Mercurial > repos > pitagora > gatk_1_6_pitagora
changeset 0:5bad325a92d7
commit
author | pitagora <ryota.yamanaka@riken.jp> |
---|---|
date | Wed, 15 Apr 2015 18:41:05 +0900 |
parents | |
children | 9b55ddff33e2 |
files | GATKCombineVariants.py GATKCombineVariants.xml GATKCountCovariate.py GATKCountCovariate.xml GATKIndelRealigner.py GATKRealignerTargetCreator.py GATKRealignerTargetCreator.xml GATKSelectVariants.xml GATKTableRecalibration.xml GATKUnifiedGenotyper.xml tool_dependencies.xml |
diffstat | 11 files changed, 363 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKCombineVariants.py Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import optparse, os, shutil, subprocess, sys, tempfile +import argparse +import traceback + +def __main__(): + try: + descr = "GATKCombineVariants.py: version 1.0. \n" + parser = argparse.ArgumentParser(description=descr) + parser.add_argument( '-S', '--S', help='snp vcf file' ) + parser.add_argument( '-I', '--I', help='indel vcf file' ) + parser.add_argument( '-R', '--R', help='reference genome file' ) + parser.add_argument( '-O', '--O', help='output file' ) + args = parser.parse_args() + + cmd = "java -Xmx2g -jar GenomeAnalysisTK.jar -R " + args.R + " -T CombineVariants -V:SNV,vcf " + args.S + " -V:INDEL,vcf " + args.I + " -o " + args.O + print "The cmd is %s" % cmd + + proc = subprocess.Popen( args=cmd, shell=True ) + rtncode = proc.wait() + + if rtncode != 0: + raise Exception + except Exception: + err = traceback.format_exc(None) + print err + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKCombineVariants.xml Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,25 @@ +<tool id="GATK_combine_variants" name="GATK Combine Variants" version="1.6.13"> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + </requirements> + <command interpreter="python"> + GATKCombineVariants.py + --S $snp + --I $indel + --R $db_refgenomes + --O $output + </command> + <inputs> + <param type="data" name="snp" format="vcf" label="SNP vcf file" /> + <param type="data" name="indel" format="vcf" label="INDEL vcf file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + </inputs> + <outputs> + <data format="vcf" name="output" /> + </outputs> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKCountCovariate.py Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import optparse, os, shutil, subprocess, sys, tempfile +import argparse +import traceback + +def __main__(): + try: + descr = "GATKCountCovariate.py: version 1.0. kick a GATK Count Covariate command. \n" + parser = argparse.ArgumentParser(description=descr) + parser.add_argument( '-I', '--I', help='input file' ) + parser.add_argument( '-O', '--O', help='output file' ) + parser.add_argument( '-R', '--R', help='reference genome file' ) + parser.add_argument( '-KN', '--KN', help='known sites' ) + parser.add_argument( '-nt', '--nt', help='process count' ) + parser.add_argument( '-log', '--log', help='log file' ) + args = parser.parse_args() + + target_dir = os.getcwd() + gatk_filename = os.path.join( target_dir, "gatk_file.bam" ) + os.symlink( args.I, gatk_filename ) + + cmd = "samtools index " + gatk_filename + print "The cmd is %s " % cmd + + proc = subprocess.Popen( args=cmd, shell=True ) + rtncode = proc.wait() + + if rtncode != 0: + raise Exception + + cmd = "java -Xmx2g -jar GenomeAnalysisTK.jar -T CountCovariates -I " + gatk_filename + " -R " + args.R + " -knownSites:dbsnp,VCF " + args.KN + " -nt " + args.nt + " -l INFO -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -log " + args.log + " -recalFile " + args.O + print "The cmd is %s" % cmd + + proc = subprocess.Popen( args=cmd, shell=True ) + rtncode = proc.wait() + + if rtncode != 0: + raise Exception + except Exception: + err = traceback.format_exc(None) + print err + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKCountCovariate.xml Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,31 @@ +<tool id="GATK_count_covariate" name="GATK Count Covariate" version="1.6.13"> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + </requirements> + <command interpreter="python"> + GATKCountCovariate.py + --I $input + --O $output + --R $db_refgenomes + --KN $db_snps + --nt $nt + --log $log + </command> + <inputs> + <param type="data" name="input" format="bam" label="Input file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + <param type="select" name="db_snps" label="Known sites [-knownSites]"> + <options from_data_table="gatk_snps" /> + </param> + <param type="integer" name="nt" value="0" label="processes num [-nt]" help="" /> + </inputs> + <outputs> + <data format="csv" name="output" /> + <data format="txt" name="log" /> + </outputs> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKIndelRealigner.py Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,47 @@ +#!/home/ubuntu/galaxy-python/python +# -*- coding: utf-8 -*- + +import optparse, os, shutil, subprocess, sys, tempfile +import argparse +import traceback + +def __main__(): + try: + GATK_DIR = "/home/ubuntu/ext_tools/GenomeAnalysisTK-1.6-13-g91f02df" + + descr = "GATKIndelRealigner.py: version 1.0. kick a GATK Indel Realigner command. \n" + parser = argparse.ArgumentParser(description=descr) + parser.add_argument( '-I', '--I', help='input file' ) + parser.add_argument( '-O', '--O', help='output file' ) + parser.add_argument( '-T', '--T', help='target intervals file' ) + parser.add_argument( '-R', '--R', help='reference genome file' ) + parser.add_argument( '-log', '--log', help='log file' ) + args = parser.parse_args() + + target_dir = os.getcwd() + gatk_filename = os.path.join( target_dir, "gatk_file.bam" ) + os.symlink( args.I, gatk_filename ) + + intervals_filename = os.path.join( target_dir, "intervals_file.intervals" ) + os.symlink( args.T, intervals_filename ) + + cmd = "samtools index " + gatk_filename + print "The cmd is %s" % cmd + + proc = subprocess.Popen( args=cmd, shell=True ) + rtncode = proc.wait() + if rtncode != 0: + raise Exception + + cmd = "java -Xmx22g -jar " + GATK_DIR + "/GenomeAnalysisTK.jar -T IndelRealigner -R " + args.R + " -I " + gatk_filename + " -targetIntervals " + intervals_filename + " -log " + args.log + " -o " + args.O + print "The cmd is %s" % cmd + + proc = subprocess.Popen( args=cmd, shell=True ) + rtncode = proc.wait() + if rtncode != 0: + raise Exception + except Exception: + err = traceback.format_exc(None) + print err + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKRealignerTargetCreator.py Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import optparse, os, shutil, subprocess, sys, tempfile +import argparse +import traceback + +def __main__(): + try: + descr = "GATKRealignerTargetCreator.py: version 1.0. kick a GATK Realigner Target Creator command. \n" + parser = argparse.ArgumentParser(description=descr) + parser.add_argument( '-I', '--I', help='input file' ) + parser.add_argument( '-O', '--O', help='output file' ) + parser.add_argument( '-L', '--L', help='bed file' ) + parser.add_argument( '-R', '--R', help='reference genome file' ) + parser.add_argument( '-nt', '--nt', help='process count' ) + parser.add_argument( '-log', '--log', help='log' ) + args = parser.parse_args() + + target_dir = os.getcwd() + gatk_filename = os.path.join( target_dir, "gatk_file.bam" ) + os.symlink( args.I, gatk_filename ) + + cmd1 = "samtools index " + gatk_filename + print "The cmd is %s" % cmd1 + + proc1 = subprocess.Popen( args=cmd1, shell=True ) + rtncode1 = proc1.wait() + + if rtncode1 != 0: + raise Exception + + cmd2 = "java -Xmx2g -jar GenomeAnalysisTK.jar -T RealignerTargetCreator -R " + args.R + " -nt " + args.nt + " -I " + gatk_filename + " -L " + args.L + " -log " + args.log + " -o " + args.O + print "The cmd is %s" % cmd2 + + proc2 = subprocess.Popen( args=cmd2, shell=True ) + rtncode2 = proc2.wait() + + if rtncode2 != 0: + raise Exception + except Exception: + err = traceback.format_exc(None) + print err + +if __name__=="__main__": __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKRealignerTargetCreator.xml Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,33 @@ +<tool id="GATK_realigner_target_creator" name="GATK Realigner Target Creator" version="1.6.13"> + <description></description> + <command interpreter="python"> + GATKRealignerTargetCreator.py + --I $input + --O $output + --L $db_beds + --R $db_refgenomes + --nt $nt + --log $log + </command> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + <requirement type="package" version="0.1.19">samtools</requirement> + </requirements> + <inputs> + <param type="data" name="input" format="bam" label="Input file" /> + <param type="integer" name="nt" value="0" label="processes num [-nt]" help="" /> + <param type="select" name="db_beds" label="bed file [-L]"> + <options from_data_table="gatk_beds" /> + </param> + <param type="select" name="db_refgenomes" label="reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + </inputs> + <outputs> + <data format="gatk_interval" name="output" /> + <data format="txt" name="log" /> + </outputs> + <help> + </help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKSelectVariants.xml Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,32 @@ +<tool id="GATK_select_variants" name="GATK Select Variants" version="1.6.13"> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + </requirements> + <command interpreter="python"> + GATKSelectVariants.py + --I $input + --O $output + --R $db_refgenomes + --nt $nt + --st $select_type + --log $log + </command> + <inputs> + <param type="data" name="input" format="vcf" label="Input file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + <param type="select" name="select_type" label="select type [-selectType]"> + <option value="SNP">SNP</option> + <option value="INDEL">INDEL</option> + </param> + <param type="integer" name="nt" value="0" label="processes num [-nt]" help="" /> + </inputs> + <outputs> + <data format="vcf" name="output" /> + <data format="txt" name="log" /> + </outputs> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKTableRecalibration.xml Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,27 @@ +<tool id="GATK_table_recalibration" name="GATK Table Recalibration" version="1.6.13"> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + </requirements> + <command interpreter="python"> + GATKTableRecalibration.py + --I $input + --O $output + --R $db_refgenomes + --C $recal_csv + --log $log + </command> + <inputs> + <param type="data" name="input" format="bam" label="Input file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + <param type="data" name="recal_csv" format="csv" label="Recal file [-recalFile]" /> + </inputs> + <outputs> + <data format="bam" name="output" /> + <data format="txt" name="log" /> + </outputs> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GATKUnifiedGenotyper.xml Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,39 @@ +<tool id="GATK_unified_genotyper" name="GATK Unified Genotyper" version="1.6.13"> + <description></description> + <requirements> + <requirement type="package" version="1.6.13">gatk</requirement> + </requirements> + <command interpreter="python"> + GATKUnifiedGenotyper.py + --I $input + --O $output + --R $db_refgenomes + --L $db_beds + --nt $nt + --stc $stc + --sec $sec + --log $log + --D $db_snps + </command> + <inputs> + <param type="data" name="input" format="bam" label="Input file" /> + <param type="select" name="db_refgenomes" label="Reference genome [-R]"> + <options from_data_table="gatk_refgenomes" /> + </param> + <param type="select" name="db_beds" label="Bed [-L]"> + <options from_data_table="gatk_beds" /> + </param> + <param type="integer" name="nt" value="0" label="processes num [-nt]" /> + <param type="float" name="stc" value="50.0" label="stand call conf [-stand_call_conf]" /> + <param type="float" name="sec" value="10.0" label="stand emit conf [-stand_emit_conf]" /> + <param type="select" name="db_snps" label="dbSNP vcf file [-D]"> + <options from_data_table="gatk_snps" /> + </param> + </inputs> + <outputs> + <data format="vcf" name="output" /> + <data format="txt" name="log" /> + </outputs> + <help> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Wed Apr 15 18:41:05 2015 +0900 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="gatk" version="1.6.13"> + <repository changeset_revision="8e93fe5935fd" name="package_gatk_1_6_13" owner="pitagora" prior_installation_required="False" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + </package> + <package name="samtools" version="0.1.19"> + <repository changeset_revision="95d2c4aefb5f" name="package_samtools_0_1_19" owner="devteam" prior_installation_required="False" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>