Mercurial > repos > pitagora > gatk_1_6_pitagora
changeset 5:04a1efe6021e default tip
commit
author | pitagora <ryota.yamanaka@riken.jp> |
---|---|
date | Thu, 16 Apr 2015 11:50:32 +0900 |
parents | 2f7daeeaf4aa |
children | |
files | GATKCombineVariants.py GATKCombineVariants.xml GATKCountCovariate.py GATKCountCovariate.xml GATKIndelRealigner.py GATKSelectVariants.xml GATKTableRecalibration.xml GATKUnifiedGenotyper.xml |
diffstat | 8 files changed, 222 insertions(+), 213 deletions(-) [+] |
line wrap: on
line diff
--- a/GATKCombineVariants.py Thu Apr 16 10:53:16 2015 +0900 +++ b/GATKCombineVariants.py Thu Apr 16 11:50:32 2015 +0900 @@ -9,13 +9,14 @@ try: descr = "GATKCombineVariants.py: version 1.0. \n" parser = argparse.ArgumentParser(description=descr) + parser.add_argument( '-J', '--J', help='java jar path' ) parser.add_argument( '-S', '--S', help='snp vcf file' ) parser.add_argument( '-I', '--I', help='indel vcf file' ) parser.add_argument( '-R', '--R', help='reference genome file' ) parser.add_argument( '-O', '--O', help='output file' ) args = parser.parse_args() - cmd = "java -Xmx2g -jar GenomeAnalysisTK.jar -R " + args.R + " -T CombineVariants -V:SNV,vcf " + args.S + " -V:INDEL,vcf " + args.I + " -o " + args.O + cmd = "java -Xmx2g -jar " + args.J + "/GenomeAnalysisTK.jar -R " + args.R + " -T CombineVariants -V:SNV,vcf " + args.S + " -V:INDEL,vcf " + args.I + " -o " + args.O print "The cmd is %s" % cmd proc = subprocess.Popen( args=cmd, shell=True )
--- a/GATKCombineVariants.xml Thu Apr 16 10:53:16 2015 +0900 +++ b/GATKCombineVariants.xml Thu Apr 16 11:50:32 2015 +0900 @@ -1,25 +1,26 @@ <tool id="GATK_combine_variants" name="GATK Combine Variants" version="1.6.13"> - <description></description> - <requirements> - <requirement type="package" version="1.6.13">gatk</requirement> - </requirements> - <command interpreter="python"> - GATKCombineVariants.py - --S $snp - --I $indel - --R $db_refgenomes - --O $output - </command> - <inputs> - <param type="data" name="snp" format="vcf" label="SNP vcf file" /> - <param type="data" name="indel" format="vcf" label="INDEL vcf file" /> - <param type="select" name="db_refgenomes" label="Reference genome [-R]"> - <options from_data_table="gatk_refgenomes" /> - </param> - </inputs> - <outputs> - <data format="vcf" name="output" /> - </outputs> - <help> - </help> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + </requirements> + <command interpreter="python"> + GATKCombineVariants.py + --J \$JAVA_JAR_PATH + --S $snp + --I $indel + --R $db_refgenomes + --O $output + </command> + <inputs> + <param type="data" name="snp" format="vcf" label="SNP vcf file" /> + <param type="data" name="indel" format="vcf" label="INDEL vcf file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + </inputs> + <outputs> + <data format="vcf" name="output" /> + </outputs> + <help> + </help> </tool>
--- a/GATKCountCovariate.py Thu Apr 16 10:53:16 2015 +0900 +++ b/GATKCountCovariate.py Thu Apr 16 11:50:32 2015 +0900 @@ -6,40 +6,41 @@ import traceback def __main__(): - try: - descr = "GATKCountCovariate.py: version 1.0. kick a GATK Count Covariate command. \n" - parser = argparse.ArgumentParser(description=descr) - parser.add_argument( '-I', '--I', help='input file' ) - parser.add_argument( '-O', '--O', help='output file' ) - parser.add_argument( '-R', '--R', help='reference genome file' ) - parser.add_argument( '-KN', '--KN', help='known sites' ) - parser.add_argument( '-nt', '--nt', help='process count' ) - parser.add_argument( '-log', '--log', help='log file' ) - args = parser.parse_args() - - target_dir = os.getcwd() - gatk_filename = os.path.join( target_dir, "gatk_file.bam" ) - os.symlink( args.I, gatk_filename ) - - cmd = "samtools index " + gatk_filename - print "The cmd is %s " % cmd - - proc = subprocess.Popen( args=cmd, shell=True ) - rtncode = proc.wait() - - if rtncode != 0: - raise Exception - - cmd = "java -Xmx2g -jar GenomeAnalysisTK.jar -T CountCovariates -I " + gatk_filename + " -R " + args.R + " -knownSites:dbsnp,VCF " + args.KN + " -nt " + args.nt + " -l INFO -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -log " + args.log + " -recalFile " + args.O - print "The cmd is %s" % cmd - - proc = subprocess.Popen( args=cmd, shell=True ) - rtncode = proc.wait() - - if rtncode != 0: - raise Exception - except Exception: - err = traceback.format_exc(None) - print err + try: + descr = "GATKCountCovariate.py: version 1.0. kick a GATK Count Covariate command. \n" + parser = argparse.ArgumentParser(description=descr) + parser.add_argument( '-J', '--J', help='java jar path' ) + parser.add_argument( '-I', '--I', help='input file' ) + parser.add_argument( '-O', '--O', help='output file' ) + parser.add_argument( '-R', '--R', help='reference genome file' ) + parser.add_argument( '-KN', '--KN', help='known sites' ) + parser.add_argument( '-nt', '--nt', help='process count' ) + parser.add_argument( '-log', '--log', help='log file' ) + args = parser.parse_args() + + target_dir = os.getcwd() + gatk_filename = os.path.join( target_dir, "gatk_file.bam" ) + os.symlink( args.I, gatk_filename ) + + cmd = "samtools index " + gatk_filename + print "The cmd is %s " % cmd + + proc = subprocess.Popen( args=cmd, shell=True ) + rtncode = proc.wait() + + if rtncode != 0: + raise Exception + + cmd = "java -Xmx2g -jar " + args.J + "/GenomeAnalysisTK.jar -T CountCovariates -I " + gatk_filename + " -R " + args.R + " -knownSites:dbsnp,VCF " + args.KN + " -nt " + args.nt + " -l INFO -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -log " + args.log + " -recalFile " + args.O + print "The cmd is %s" % cmd + + proc = subprocess.Popen( args=cmd, shell=True ) + rtncode = proc.wait() + + if rtncode != 0: + raise Exception + except Exception: + err = traceback.format_exc(None) + print err if __name__=="__main__": __main__()
--- a/GATKCountCovariate.xml Thu Apr 16 10:53:16 2015 +0900 +++ b/GATKCountCovariate.xml Thu Apr 16 11:50:32 2015 +0900 @@ -1,31 +1,33 @@ <tool id="GATK_count_covariate" name="GATK Count Covariate" version="1.6.13"> - <description></description> - <requirements> - <requirement type="package" version="1.6.13">gatk</requirement> - </requirements> - <command interpreter="python"> - GATKCountCovariate.py - --I $input - --O $output - --R $db_refgenomes - --KN $db_snps - --nt $nt - --log $log - </command> - <inputs> - <param type="data" name="input" format="bam" label="Input file" /> - <param type="select" name="db_refgenomes" label="Reference genome [-R]"> - <options from_data_table="gatk_refgenomes" /> - </param> - <param type="select" name="db_snps" label="Known sites [-knownSites]"> - <options from_data_table="gatk_snps" /> - </param> - <param type="integer" name="nt" value="0" label="processes num [-nt]" help="" /> - </inputs> - <outputs> - <data format="csv" name="output" /> - <data format="txt" name="log" /> - </outputs> - <help> - </help> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + <requirement type="package" version="0.1.19">samtools</requirement> + </requirements> + <command interpreter="python"> + GATKCountCovariate.py + --J \$JAVA_JAR_PATH + --I $input + --O $output + --R $db_refgenomes + --KN $db_snps + --nt $nt + --log $log + </command> + <inputs> + <param type="data" name="input" format="bam" label="Input file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + <param type="select" name="db_snps" label="Known sites [-knownSites]"> + <options from_data_table="gatk_snps" /> + </param> + <param type="integer" name="nt" value="0" label="processes num [-nt]" help="" /> + </inputs> + <outputs> + <data format="csv" name="output" /> + <data format="txt" name="log" /> + </outputs> + <help> + </help> </tool>
--- a/GATKIndelRealigner.py Thu Apr 16 10:53:16 2015 +0900 +++ b/GATKIndelRealigner.py Thu Apr 16 11:50:32 2015 +0900 @@ -6,42 +6,43 @@ import traceback def __main__(): - try: - GATK_DIR = "/home/ubuntu/ext_tools/GenomeAnalysisTK-1.6-13-g91f02df" - - descr = "GATKIndelRealigner.py: version 1.0. kick a GATK Indel Realigner command. \n" - parser = argparse.ArgumentParser(description=descr) - parser.add_argument( '-I', '--I', help='input file' ) - parser.add_argument( '-O', '--O', help='output file' ) - parser.add_argument( '-T', '--T', help='target intervals file' ) - parser.add_argument( '-R', '--R', help='reference genome file' ) - parser.add_argument( '-log', '--log', help='log file' ) - args = parser.parse_args() - - target_dir = os.getcwd() - gatk_filename = os.path.join( target_dir, "gatk_file.bam" ) - os.symlink( args.I, gatk_filename ) - - intervals_filename = os.path.join( target_dir, "intervals_file.intervals" ) - os.symlink( args.T, intervals_filename ) - - cmd = "samtools index " + gatk_filename - print "The cmd is %s" % cmd - - proc = subprocess.Popen( args=cmd, shell=True ) - rtncode = proc.wait() - if rtncode != 0: - raise Exception - - cmd = "java -Xmx22g -jar " + GATK_DIR + "/GenomeAnalysisTK.jar -T IndelRealigner -R " + args.R + " -I " + gatk_filename + " -targetIntervals " + intervals_filename + " -log " + args.log + " -o " + args.O - print "The cmd is %s" % cmd - - proc = subprocess.Popen( args=cmd, shell=True ) - rtncode = proc.wait() - if rtncode != 0: - raise Exception - except Exception: - err = traceback.format_exc(None) - print err + try: + GATK_DIR = "/home/ubuntu/ext_tools/GenomeAnalysisTK-1.6-13-g91f02df" + + descr = "GATKIndelRealigner.py: version 1.0. kick a GATK Indel Realigner command. \n" + parser = argparse.ArgumentParser(description=descr) + parser.add_argument( '-J', '--J', help='java jar path' ) + parser.add_argument( '-I', '--I', help='input file' ) + parser.add_argument( '-O', '--O', help='output file' ) + parser.add_argument( '-T', '--T', help='target intervals file' ) + parser.add_argument( '-R', '--R', help='reference genome file' ) + parser.add_argument( '-log', '--log', help='log file' ) + args = parser.parse_args() + + target_dir = os.getcwd() + gatk_filename = os.path.join( target_dir, "gatk_file.bam" ) + os.symlink( args.I, gatk_filename ) + + intervals_filename = os.path.join( target_dir, "intervals_file.intervals" ) + os.symlink( args.T, intervals_filename ) + + cmd = "samtools index " + gatk_filename + print "The cmd is %s" % cmd + + proc = subprocess.Popen( args=cmd, shell=True ) + rtncode = proc.wait() + if rtncode != 0: + raise Exception + + cmd = "java -Xmx2g -jar " + args.J + "/GenomeAnalysisTK.jar -T IndelRealigner -R " + args.R + " -I " + gatk_filename + " -targetIntervals " + intervals_filename + " -log " + args.log + " -o " + args.O + print "The cmd is %s" % cmd + + proc = subprocess.Popen( args=cmd, shell=True ) + rtncode = proc.wait() + if rtncode != 0: + raise Exception + except Exception: + err = traceback.format_exc(None) + print err if __name__=="__main__": __main__()
--- a/GATKSelectVariants.xml Thu Apr 16 10:53:16 2015 +0900 +++ b/GATKSelectVariants.xml Thu Apr 16 11:50:32 2015 +0900 @@ -1,32 +1,33 @@ <tool id="GATK_select_variants" name="GATK Select Variants" version="1.6.13"> - <description></description> - <requirements> - <requirement type="package" version="1.6.13">gatk</requirement> - </requirements> - <command interpreter="python"> - GATKSelectVariants.py - --I $input - --O $output - --R $db_refgenomes - --nt $nt - --st $select_type - --log $log - </command> - <inputs> - <param type="data" name="input" format="vcf" label="Input file" /> - <param type="select" name="db_refgenomes" label="Reference genome [-R]"> - <options from_data_table="gatk_refgenomes" /> - </param> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + </requirements> + <command interpreter="python"> + GATKSelectVariants.py + --J \$JAVA_JAR_PATH + --I $input + --O $output + --R $db_refgenomes + --nt $nt + --st $select_type + --log $log + </command> + <inputs> + <param type="data" name="input" format="vcf" label="Input file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> <param type="select" name="select_type" label="select type [-selectType]"> <option value="SNP">SNP</option> <option value="INDEL">INDEL</option> </param> - <param type="integer" name="nt" value="0" label="processes num [-nt]" help="" /> - </inputs> - <outputs> - <data format="vcf" name="output" /> - <data format="txt" name="log" /> - </outputs> - <help> - </help> + <param type="integer" name="nt" value="0" label="processes num [-nt]" help="" /> + </inputs> + <outputs> + <data format="vcf" name="output" /> + <data format="txt" name="log" /> + </outputs> + <help> + </help> </tool>
--- a/GATKTableRecalibration.xml Thu Apr 16 10:53:16 2015 +0900 +++ b/GATKTableRecalibration.xml Thu Apr 16 11:50:32 2015 +0900 @@ -1,27 +1,28 @@ <tool id="GATK_table_recalibration" name="GATK Table Recalibration" version="1.6.13"> - <description></description> - <requirements> - <requirement type="package" version="1.6.13">gatk</requirement> - </requirements> - <command interpreter="python"> - GATKTableRecalibration.py - --I $input - --O $output - --R $db_refgenomes - --C $recal_csv - --log $log - </command> - <inputs> - <param type="data" name="input" format="bam" label="Input file" /> - <param type="select" name="db_refgenomes" label="Reference genome [-R]"> - <options from_data_table="gatk_refgenomes" /> - </param> - <param type="data" name="recal_csv" format="csv" label="Recal file [-recalFile]" /> - </inputs> - <outputs> - <data format="bam" name="output" /> - <data format="txt" name="log" /> - </outputs> - <help> - </help> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + </requirements> + <command interpreter="python"> + GATKTableRecalibration.py + --J \$JAVA_JAR_PATH + --I $input + --O $output + --R $db_refgenomes + --C $recal_csv + --log $log + </command> + <inputs> + <param type="data" name="input" format="bam" label="Input file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + <param type="data" name="recal_csv" format="csv" label="Recal file [-recalFile]" /> + </inputs> + <outputs> + <data format="bam" name="output" /> + <data format="txt" name="log" /> + </outputs> + <help> + </help> </tool>
--- a/GATKUnifiedGenotyper.xml Thu Apr 16 10:53:16 2015 +0900 +++ b/GATKUnifiedGenotyper.xml Thu Apr 16 11:50:32 2015 +0900 @@ -1,39 +1,40 @@ <tool id="GATK_unified_genotyper" name="GATK Unified Genotyper" version="1.6.13"> - <description></description> - <requirements> - <requirement type="package" version="1.6.13">gatk</requirement> - </requirements> - <command interpreter="python"> - GATKUnifiedGenotyper.py - --I $input - --O $output - --R $db_refgenomes - --L $db_beds - --nt $nt - --stc $stc - --sec $sec - --log $log - --D $db_snps - </command> - <inputs> - <param type="data" name="input" format="bam" label="Input file" /> - <param type="select" name="db_refgenomes" label="Reference genome [-R]"> - <options from_data_table="gatk_refgenomes" /> - </param> - <param type="select" name="db_beds" label="Bed [-L]"> - <options from_data_table="gatk_beds" /> - </param> - <param type="integer" name="nt" value="0" label="processes num [-nt]" /> - <param type="float" name="stc" value="50.0" label="stand call conf [-stand_call_conf]" /> - <param type="float" name="sec" value="10.0" label="stand emit conf [-stand_emit_conf]" /> - <param type="select" name="db_snps" label="dbSNP vcf file [-D]"> - <options from_data_table="gatk_snps" /> - </param> - </inputs> - <outputs> - <data format="vcf" name="output" /> - <data format="txt" name="log" /> - </outputs> - <help> - </help> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + </requirements> + <command interpreter="python"> + GATKUnifiedGenotyper.py + --J \$JAVA_JAR_PATH + --I $input + --O $output + --R $db_refgenomes + --L $db_beds + --nt $nt + --stc $stc + --sec $sec + --log $log + --D $db_snps + </command> + <inputs> + <param type="data" name="input" format="bam" label="Input file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + <param type="select" name="db_beds" label="Bed [-L]"> + <options from_data_table="gatk_beds" /> + </param> + <param type="integer" name="nt" value="0" label="processes num [-nt]" /> + <param type="float" name="stc" value="50.0" label="stand call conf [-stand_call_conf]" /> + <param type="float" name="sec" value="10.0" label="stand emit conf [-stand_emit_conf]" /> + <param type="select" name="db_snps" label="dbSNP vcf file [-D]"> + <options from_data_table="gatk_snps" /> + </param> + </inputs> + <outputs> + <data format="vcf" name="output" /> + <data format="txt" name="log" /> + </outputs> + <help> + </help> </tool>