I tried HaplotypeCaller on whole-genome sample in GVCF mode and it takes several days and ends up incomplete. I would like to use Queue script to parallelize the jobs. I am new to using Queue scripts and framed the below script using the posts in the forum and have errors.
package org.broadinstitute.sting.queue.qscripts
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF
import org.broadinstitute.sting.queue.extensions.gatk._
class HaplotypeCaller extends QScript {
// Create an alias 'qscript' to be able to access variables in the VariantCaller.
// 'qscript' is now the same as 'VariantCaller.this'
qscript =>
// Required arguments. All initialized to empty values.
@Input(doc="The reference file for the bam files.", shortName="R", required=true)
var referenceFile: File = _
@Input(doc="One or more bam files.", shortName="I")
var bamFiles: List[File] = Nil
@Input(doc="Output core filename.", shortName="O", required=true)
var out: File = _
@Argument(doc="Maxmem.", shortName="mem", required=true)
var maxMem: Int = _
@Argument(doc="Number of cpu threads per data thread", shortName="nct", required=true)
var numCPUThreads: Int = _
@Argument(doc="Number of scatters", shortName="nsc", required=true)
var numScatters: Int = _
@Argument(doc="Minimum phred-scaled confidence to call variants", shortName="stand_call_conf", required=true)
var standCallConf: Int = _ //30 //default: best-practices value
@Argument(doc="Minimum phred-scaled confidence to emit variants", shortName="stand_emit_conf", required=true)
var standEmitConf: Int = _ //10 //default: best-practices value
@Argument(doc="Mode for emitting reference confidenc scores", shortName="ERC", required=true)
var EmitRefConfidence: Boolean = true
// The following arguments are all optional.
@Input(doc="An optional file with known SNP sites.", shortName="D", required=false)
var dbsnpFile: File = _
@Input(doc="An optional file with targets intervals.", shortName="L", required=false)
var targetFile: File = _
@Argument(doc="Amount of padding (in bp) to add to each interval", shortName="ip", required=false)
var intervalPadding: Int = 0
def script() {
val haplotypeCaller = new HaplotypeCaller
// All required input
haplotypeCaller.input_file = bamFiles
haplotypeCaller.reference_sequence = referenceFile
haplotypeCaller.out = qscript.out + ".g.vcf"
haplotypeCaller.scatterCount = numScatters
haplotypeCaller.memoryLimit = maxMem
haplotypeCaller.num_cpu_threads_per_data_thread = numCPUThreads
haplotypeCaller.stand_emit_conf = standEmitConf
haplotypeCaller.stand_call_conf = standCallConf
haplotypeCaller.emitRefConfidence = GVCF
// Optional input
if (dbsnpFile != null) {
haplotypeCaller.D = dbsnpFile
}
if (targetFile != null) {
haplotypeCaller.L :+= targetFile
haplotypeCaller.ip = intervalPadding
}
//add function to queue
add(haplotypeCaller)
}
}
The script is run from the command line as follows:
java -jar /proj/lohi/Canine_Tools/GATK-Queue-3.5/Queue.jar -S HaplotypeCaller.scala -R canFam3.fa -I BD01_recalibrated.bam -stand_call_conf 30 -stand_emit_conf 10 -nct 4 -ERC GVCF -O testQueue -run -debug
which throws the error:
`INFO 15:06:10,940 QScriptManager - Compiling 1 QScript
ERROR 15:06:11,096 QScriptManager - HaplotypeCaller.scala:3: object QScript is not a member of package org.broadinstitute.sting.queue
ERROR 15:06:11,102 QScriptManager - import org.broadinstitute.sting.queue.QScript
ERROR 15:06:11,103 QScriptManager - ^
ERROR 15:06:11,129 QScriptManager - HaplotypeCaller.scala:5: object extensions is not a member of package org.broadinstitute.sting.queue
ERROR 15:06:11,133 QScriptManager - import org.broadinstitute.sting.queue.extensions.gatk._
ERROR 15:06:11,134 QScriptManager - ^
ERROR 15:06:11,140 QScriptManager - HaplotypeCaller.scala:7: not found: type QScript
ERROR 15:06:11,144 QScriptManager - class HaplotypeCaller extends QScript {
ERROR 15:06:11,145 QScriptManager - ^
ERROR 15:06:11,331 QScriptManager - HaplotypeCaller.scala:14: not found: type File
ERROR 15:06:11,335 QScriptManager - var referenceFile: File = _
ERROR 15:06:11,335 QScriptManager - ^
ERROR 15:06:11,337 QScriptManager - HaplotypeCaller.scala:13: not found: type Input
ERROR 15:06:11,338 QScriptManager - @Input(doc="The reference file for the bam files.", shortName="R", required=true)
ERROR 15:06:11,339 QScriptManager - ^
ERROR 15:06:11,346 QScriptManager - HaplotypeCaller.scala:17: not found: type File
ERROR 15:06:11,347 QScriptManager - var bamFiles: List[File] = Nil
ERROR 15:06:11,347 QScriptManager - ^
ERROR 15:06:11,349 QScriptManager - HaplotypeCaller.scala:16: not found: type Input
ERROR 15:06:11,350 QScriptManager - @Input(doc="One or more bam files.", shortName="I")
ERROR 15:06:11,351 QScriptManager - ^
ERROR 15:06:11,622 QScriptManager - HaplotypeCaller.scala:20: not found: type File
ERROR 15:06:11,623 QScriptManager - var out: File = _
ERROR 15:06:11,623 QScriptManager - ^
ERROR 15:06:11,625 QScriptManager - HaplotypeCaller.scala:19: not found: type Input
ERROR 15:06:11,625 QScriptManager - @Input(doc="Output core filename.", shortName="O", required=true)
ERROR 15:06:11,626 QScriptManager - ^
ERROR 15:06:11,631 QScriptManager - HaplotypeCaller.scala:22: not found: type Argument
ERROR 15:06:11,632 QScriptManager - @Argument(doc="Maxmem.", shortName="mem", required=true)
ERROR 15:06:11,633 QScriptManager - ^
ERROR 15:06:11,634 QScriptManager - HaplotypeCaller.scala:25: not found: type Argument
ERROR 15:06:11,635 QScriptManager - @Argument(doc="Number of cpu threads per data thread", shortName="nct", required=true)
ERROR 15:06:11,636 QScriptManager - ^
ERROR 15:06:11,637 QScriptManager - HaplotypeCaller.scala:28: not found: type Argument
ERROR 15:06:11,638 QScriptManager - @Argument(doc="Number of scatters", shortName="nsc", required=true)
ERROR 15:06:11,639 QScriptManager - ^
ERROR 15:06:11,640 QScriptManager - HaplotypeCaller.scala:31: not found: type Argument
ERROR 15:06:11,641 QScriptManager - @Argument(doc="Minimum phred-scaled confidence to call variants", shortName="stand_call_conf", required=true)
ERROR 15:06:11,642 QScriptManager - ^
ERROR 15:06:11,643 QScriptManager - HaplotypeCaller.scala:34: not found: type Argument
ERROR 15:06:11,644 QScriptManager - @Argument(doc="Minimum phred-scaled confidence to emit variants", shortName="stand_emit_conf", required=true)
ERROR 15:06:11,644 QScriptManager - ^
ERROR 15:06:11,650 QScriptManager - HaplotypeCaller.scala:37: not found: type Argument
ERROR 15:06:11,651 QScriptManager - @Argument(doc="Mode for emitting reference confidenc scores", shortName="ERC", required=true)
ERROR 15:06:11,652 QScriptManager - ^
ERROR 15:06:11,653 QScriptManager - HaplotypeCaller.scala:43: not found: type File
ERROR 15:06:11,654 QScriptManager - var dbsnpFile: File = _
ERROR 15:06:11,654 QScriptManager - ^
ERROR 15:06:11,655 QScriptManager - HaplotypeCaller.scala:42: not found: type Input
ERROR 15:06:11,656 QScriptManager - @Input(doc="An optional file with known SNP sites.", shortName="D", required=false)
ERROR 15:06:11,656 QScriptManager - ^
ERROR 15:06:11,657 QScriptManager - HaplotypeCaller.scala:46: not found: type File
ERROR 15:06:11,658 QScriptManager - var targetFile: File = _
ERROR 15:06:11,658 QScriptManager - ^
ERROR 15:06:11,659 QScriptManager - HaplotypeCaller.scala:45: not found: type Input
ERROR 15:06:11,660 QScriptManager - @Input(doc="An optional file with targets intervals.", shortName="L", required=false)
ERROR 15:06:11,660 QScriptManager - ^
ERROR 15:06:11,662 QScriptManager - HaplotypeCaller.scala:48: not found: type Argument
ERROR 15:06:11,662 QScriptManager - @Argument(doc="Amount of padding (in bp) to add to each interval", shortName="ip", required=false)
ERROR 15:06:11,663 QScriptManager - ^
ERROR 15:06:11,888 QScriptManager - HaplotypeCaller.scala:55: value input_file is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,888 QScriptManager - haplotypeCaller.input_file = bamFiles
ERROR 15:06:11,889 QScriptManager - ^
ERROR 15:06:11,896 QScriptManager - HaplotypeCaller.scala:56: value reference_sequence is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,897 QScriptManager - haplotypeCaller.reference_sequence = referenceFile
ERROR 15:06:11,897 QScriptManager - ^
ERROR 15:06:11,907 QScriptManager - HaplotypeCaller.scala:59: value scatterCount is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,908 QScriptManager - haplotypeCaller.scatterCount = numScatters
ERROR 15:06:11,909 QScriptManager - ^
ERROR 15:06:11,915 QScriptManager - HaplotypeCaller.scala:60: value memoryLimit is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,916 QScriptManager - haplotypeCaller.memoryLimit = maxMem
ERROR 15:06:11,916 QScriptManager - ^
ERROR 15:06:11,923 QScriptManager - HaplotypeCaller.scala:61: value num_cpu_threads_per_data_thread is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,923 QScriptManager - haplotypeCaller.num_cpu_threads_per_data_thread = numCPUThreads
ERROR 15:06:11,924 QScriptManager - ^
ERROR 15:06:11,930 QScriptManager - HaplotypeCaller.scala:63: value stand_emit_conf is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,930 QScriptManager - haplotypeCaller.stand_emit_conf = standEmitConf
ERROR 15:06:11,931 QScriptManager - ^
ERROR 15:06:11,937 QScriptManager - HaplotypeCaller.scala:64: value stand_call_conf is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,938 QScriptManager - haplotypeCaller.stand_call_conf = standCallConf
ERROR 15:06:11,938 QScriptManager - ^
ERROR 15:06:11,944 QScriptManager - HaplotypeCaller.scala:65: value emitRefConfidence is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,945 QScriptManager - haplotypeCaller.emitRefConfidence = GVCF
ERROR 15:06:11,945 QScriptManager - ^
ERROR 15:06:11,952 QScriptManager - HaplotypeCaller.scala:68: value D is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,952 QScriptManager - haplotypeCaller.D = dbsnpFile
ERROR 15:06:11,953 QScriptManager - ^
ERROR 15:06:11,960 QScriptManager - HaplotypeCaller.scala:71: value L is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,961 QScriptManager - haplotypeCaller.L :+= targetFile
ERROR 15:06:11,961 QScriptManager - ^
ERROR 15:06:11,967 QScriptManager - HaplotypeCaller.scala:72: value ip is not a member of org.broadinstitute.sting.queue.qscripts.HaplotypeCaller
ERROR 15:06:11,967 QScriptManager - haplotypeCaller.ip = intervalPadding
ERROR 15:06:11,968 QScriptManager - ^
ERROR 15:06:11,970 QScriptManager - HaplotypeCaller.scala:76: not found: value add
ERROR 15:06:11,971 QScriptManager - add(haplotypeCaller)
ERROR 15:06:11,971 QScriptManager - ^
ERROR 15:06:11,981 QScriptManager - 64 errors found
##### ERROR ------------------------------------------------------------------------------------------
##### ERROR stack trace
org.broadinstitute.gatk.queue.QException: Compile of HaplotypeCaller.scala failed with 64 errors
at org.broadinstitute.gatk.queue.QScriptManager.loadScripts(QScriptManager.scala:79)
at org.broadinstitute.gatk.queue.QCommandLine.org$broadinstitute$gatk$queue$QCommandLine$$qScriptPluginManager$lzycompute(QCommandLine.scala:94)
at org.broadinstitute.gatk.queue.QCommandLine.org$broadinstitute$gatk$queue$QCommandLine$$qScriptPluginManager(QCommandLine.scala:92)
at org.broadinstitute.gatk.queue.QCommandLine.getArgumentSources(QCommandLine.scala:229)
at org.broadinstitute.gatk.utils.commandline.CommandLineProgram.start(CommandLineProgram.java:205)
at org.broadinstitute.gatk.utils.commandline.CommandLineProgram.start(CommandLineProgram.java:155)
at org.broadinstitute.gatk.queue.QCommandLine$.main(QCommandLine.scala:61)
at org.broadinstitute.gatk.queue.QCommandLine.main(QCommandLine.scala)
##### ERROR ------------------------------------------------------------------------------------------
##### ERROR A GATK RUNTIME ERROR has occurred (version 3.5-0-g36282e4):
##### ERROR
##### ERROR This might be a bug. Please check the documentation guide to see if this is a known problem.
##### ERROR If not, please post the error message, with stack trace, to the GATK forum.
##### ERROR Visit our website and forum for extensive documentation and answers to
##### ERROR commonly asked questions http://www.broadinstitute.org/gatk
##### ERROR
##### ERROR MESSAGE: Compile of HaplotypeCaller.scala failed with 64 errors
##### ERROR ------------------------------------------------------------------------------------------
INFO 15:06:12,064 QCommandLine - Shutting down jobs. Please wait...
`
Could anyone offer some suggestions to fix this. Any help is appreciated.