changeset 5:04a1efe6021e default tip

commit
author pitagora <ryota.yamanaka@riken.jp>
date Thu, 16 Apr 2015 11:50:32 +0900
parents 2f7daeeaf4aa
children
files GATKCombineVariants.py GATKCombineVariants.xml GATKCountCovariate.py GATKCountCovariate.xml GATKIndelRealigner.py GATKSelectVariants.xml GATKTableRecalibration.xml GATKUnifiedGenotyper.xml
diffstat 8 files changed, 222 insertions(+), 213 deletions(-) [+]
line wrap: on
line diff
--- a/GATKCombineVariants.py	Thu Apr 16 10:53:16 2015 +0900
+++ b/GATKCombineVariants.py	Thu Apr 16 11:50:32 2015 +0900
@@ -9,13 +9,14 @@
 	try:
 		descr = "GATKCombineVariants.py: version 1.0. \n"
 		parser = argparse.ArgumentParser(description=descr)
+                parser.add_argument( '-J', '--J', help='java jar path' )
 		parser.add_argument( '-S', '--S', help='snp vcf file' )
 		parser.add_argument( '-I', '--I', help='indel vcf file' )
 		parser.add_argument( '-R', '--R', help='reference genome file' )
 		parser.add_argument( '-O', '--O', help='output file' )
 		args = parser.parse_args()
 		
-		cmd = "java -Xmx2g -jar GenomeAnalysisTK.jar -R " + args.R + " -T CombineVariants -V:SNV,vcf " + args.S + " -V:INDEL,vcf " + args.I + " -o " + args.O
+		cmd = "java -Xmx2g -jar " + args.J + "/GenomeAnalysisTK.jar -R " + args.R + " -T CombineVariants -V:SNV,vcf " + args.S + " -V:INDEL,vcf " + args.I + " -o " + args.O
 		print "The cmd is %s" % cmd
 		
 		proc = subprocess.Popen( args=cmd, shell=True )
--- a/GATKCombineVariants.xml	Thu Apr 16 10:53:16 2015 +0900
+++ b/GATKCombineVariants.xml	Thu Apr 16 11:50:32 2015 +0900
@@ -1,25 +1,26 @@
 <tool id="GATK_combine_variants" name="GATK Combine Variants" version="1.6.13">
-	<description></description>
-	<requirements>
-	    <requirement type="package" version="1.6.13">gatk</requirement>
-	</requirements>
-	<command interpreter="python">
-		GATKCombineVariants.py
-			--S $snp
-			--I $indel
-			--R $db_refgenomes
-			--O $output
-	</command>
-	<inputs>
-		<param type="data" name="snp" format="vcf" label="SNP vcf file" />
-		<param type="data" name="indel" format="vcf" label="INDEL vcf file" />
-		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
-			<options from_data_table="gatk_refgenomes" />
-		</param>
-	</inputs>
-	<outputs>
-		<data format="vcf" name="output" />
-	</outputs>
-	<help>
-	</help>
+    <description></description>
+    <requirements>
+        <requirement type="package" version="1.6.13">gatk</requirement>
+    </requirements>
+    <command interpreter="python">
+        GATKCombineVariants.py
+            --J \$JAVA_JAR_PATH
+            --S $snp
+            --I $indel
+            --R $db_refgenomes
+            --O $output
+    </command>
+    <inputs>
+        <param type="data" name="snp" format="vcf" label="SNP vcf file" />
+        <param type="data" name="indel" format="vcf" label="INDEL vcf file" />
+        <param type="select" name="db_refgenomes" label="Reference genome [-R]">
+            <options from_data_table="gatk_refgenomes" />
+        </param>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+    </outputs>
+    <help>
+    </help>
 </tool>
--- a/GATKCountCovariate.py	Thu Apr 16 10:53:16 2015 +0900
+++ b/GATKCountCovariate.py	Thu Apr 16 11:50:32 2015 +0900
@@ -6,40 +6,41 @@
 import traceback
 
 def __main__():
-	try:
-		descr = "GATKCountCovariate.py: version 1.0. kick a GATK Count Covariate command. \n"
-		parser = argparse.ArgumentParser(description=descr)
-		parser.add_argument( '-I', '--I', help='input file' )
-		parser.add_argument( '-O', '--O', help='output file' )
-		parser.add_argument( '-R', '--R', help='reference genome file' )
-		parser.add_argument( '-KN', '--KN', help='known sites' )
-		parser.add_argument( '-nt', '--nt', help='process count' )
-		parser.add_argument( '-log', '--log', help='log file' )
-		args = parser.parse_args()
-		
-		target_dir = os.getcwd()
-		gatk_filename = os.path.join( target_dir, "gatk_file.bam" )
-		os.symlink( args.I, gatk_filename )
-		
-		cmd = "samtools index " + gatk_filename
-		print "The cmd is %s " % cmd
-		
-		proc = subprocess.Popen( args=cmd, shell=True )
-		rtncode = proc.wait()
-		
-		if rtncode != 0:
-			raise Exception
-		
-		cmd = "java -Xmx2g -jar GenomeAnalysisTK.jar -T CountCovariates -I " + gatk_filename + " -R " + args.R + " -knownSites:dbsnp,VCF " + args.KN + " -nt " + args.nt + " -l INFO -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -log " + args.log + " -recalFile " + args.O
-		print "The cmd is %s" % cmd
-		
-		proc = subprocess.Popen( args=cmd, shell=True )
-		rtncode = proc.wait()
-		
-		if rtncode != 0:
-			raise Exception
-	except Exception:
-		err = traceback.format_exc(None)
-		print err
+    try:
+        descr = "GATKCountCovariate.py: version 1.0. kick a GATK Count Covariate command. \n"
+        parser = argparse.ArgumentParser(description=descr)
+        parser.add_argument( '-J', '--J', help='java jar path' )
+        parser.add_argument( '-I', '--I', help='input file' )
+        parser.add_argument( '-O', '--O', help='output file' )
+        parser.add_argument( '-R', '--R', help='reference genome file' )
+        parser.add_argument( '-KN', '--KN', help='known sites' )
+        parser.add_argument( '-nt', '--nt', help='process count' )
+        parser.add_argument( '-log', '--log', help='log file' )
+        args = parser.parse_args()
+        
+        target_dir = os.getcwd()
+        gatk_filename = os.path.join( target_dir, "gatk_file.bam" )
+        os.symlink( args.I, gatk_filename )
+        
+        cmd = "samtools index " + gatk_filename
+        print "The cmd is %s " % cmd
+        
+        proc = subprocess.Popen( args=cmd, shell=True )
+        rtncode = proc.wait()
+        
+        if rtncode != 0:
+            raise Exception
+        
+        cmd = "java -Xmx2g -jar " + args.J + "/GenomeAnalysisTK.jar -T CountCovariates -I " + gatk_filename + " -R " + args.R + " -knownSites:dbsnp,VCF " + args.KN + " -nt " + args.nt + " -l INFO -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -log " + args.log + " -recalFile " + args.O
+        print "The cmd is %s" % cmd
+        
+        proc = subprocess.Popen( args=cmd, shell=True )
+        rtncode = proc.wait()
+        
+        if rtncode != 0:
+            raise Exception
+    except Exception:
+        err = traceback.format_exc(None)
+        print err
 
 if __name__=="__main__": __main__()
--- a/GATKCountCovariate.xml	Thu Apr 16 10:53:16 2015 +0900
+++ b/GATKCountCovariate.xml	Thu Apr 16 11:50:32 2015 +0900
@@ -1,31 +1,33 @@
 <tool id="GATK_count_covariate" name="GATK Count Covariate" version="1.6.13">
-	<description></description>
-	<requirements>
-	        <requirement type="package" version="1.6.13">gatk</requirement>
-	</requirements>
-	<command interpreter="python">
-		GATKCountCovariate.py
-			--I $input
-			--O $output
-			--R $db_refgenomes
-			--KN $db_snps
-			--nt $nt
-			--log $log
-	</command>
-	<inputs>
-		<param type="data" name="input" format="bam" label="Input file" />
-		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
-			<options from_data_table="gatk_refgenomes" />
-		</param>
-		<param type="select" name="db_snps" label="Known sites [-knownSites]">
-			<options from_data_table="gatk_snps" />
-		</param>
-		<param type="integer" name="nt" value="0" label="processes num [-nt]" help="" />
-	</inputs>
-	<outputs>
-		<data format="csv" name="output" />
-		<data format="txt" name="log" />
-	</outputs>
-	<help>
-	</help>
+    <description></description>
+    <requirements>
+        <requirement type="package" version="1.6.13">gatk</requirement>
+        <requirement type="package" version="0.1.19">samtools</requirement>
+    </requirements>
+    <command interpreter="python">
+        GATKCountCovariate.py
+            --J \$JAVA_JAR_PATH
+            --I $input
+            --O $output
+            --R $db_refgenomes
+            --KN $db_snps
+            --nt $nt
+            --log $log
+    </command>
+    <inputs>
+        <param type="data" name="input" format="bam" label="Input file" />
+        <param type="select" name="db_refgenomes" label="Reference genome [-R]">
+            <options from_data_table="gatk_refgenomes" />
+        </param>
+        <param type="select" name="db_snps" label="Known sites [-knownSites]">
+            <options from_data_table="gatk_snps" />
+        </param>
+        <param type="integer" name="nt" value="0" label="processes num [-nt]" help="" />
+    </inputs>
+    <outputs>
+        <data format="csv" name="output" />
+        <data format="txt" name="log" />
+    </outputs>
+    <help>
+    </help>
 </tool>
--- a/GATKIndelRealigner.py	Thu Apr 16 10:53:16 2015 +0900
+++ b/GATKIndelRealigner.py	Thu Apr 16 11:50:32 2015 +0900
@@ -6,42 +6,43 @@
 import traceback
 
 def __main__():
-	try:
-		GATK_DIR = "/home/ubuntu/ext_tools/GenomeAnalysisTK-1.6-13-g91f02df"
-		
-		descr = "GATKIndelRealigner.py: version 1.0. kick a GATK Indel Realigner command. \n"
-		parser = argparse.ArgumentParser(description=descr)
-		parser.add_argument( '-I', '--I', help='input file' )
-		parser.add_argument( '-O', '--O', help='output file' )
-		parser.add_argument( '-T', '--T', help='target intervals file' )
-		parser.add_argument( '-R', '--R', help='reference genome file' )
-		parser.add_argument( '-log', '--log', help='log file' )
-		args = parser.parse_args()
-		
-		target_dir = os.getcwd()
-		gatk_filename = os.path.join( target_dir, "gatk_file.bam" )
-		os.symlink( args.I, gatk_filename )
-		
-		intervals_filename = os.path.join( target_dir, "intervals_file.intervals" )
-		os.symlink( args.T, intervals_filename )
-		
-		cmd = "samtools index " + gatk_filename
-		print "The cmd is %s" % cmd
-		
-		proc = subprocess.Popen( args=cmd, shell=True )
-		rtncode = proc.wait()
-		if rtncode != 0:
-			raise Exception
-		
-		cmd = "java -Xmx22g -jar " + GATK_DIR + "/GenomeAnalysisTK.jar -T IndelRealigner -R " + args.R + " -I " + gatk_filename + " -targetIntervals " + intervals_filename + " -log " + args.log + " -o " + args.O 
-		print "The cmd is %s" % cmd
-		
-		proc = subprocess.Popen( args=cmd, shell=True )
-		rtncode = proc.wait()
-		if rtncode != 0:
-			raise Exception
-	except Exception:
-		err = traceback.format_exc(None)
-		print err
+    try:
+        GATK_DIR = "/home/ubuntu/ext_tools/GenomeAnalysisTK-1.6-13-g91f02df"
+        
+        descr = "GATKIndelRealigner.py: version 1.0. kick a GATK Indel Realigner command. \n"
+        parser = argparse.ArgumentParser(description=descr)
+        parser.add_argument( '-J', '--J', help='java jar path' )
+        parser.add_argument( '-I', '--I', help='input file' )
+        parser.add_argument( '-O', '--O', help='output file' )
+        parser.add_argument( '-T', '--T', help='target intervals file' )
+        parser.add_argument( '-R', '--R', help='reference genome file' )
+        parser.add_argument( '-log', '--log', help='log file' )
+        args = parser.parse_args()
+        
+        target_dir = os.getcwd()
+        gatk_filename = os.path.join( target_dir, "gatk_file.bam" )
+        os.symlink( args.I, gatk_filename )
+        
+        intervals_filename = os.path.join( target_dir, "intervals_file.intervals" )
+        os.symlink( args.T, intervals_filename )
+        
+        cmd = "samtools index " + gatk_filename
+        print "The cmd is %s" % cmd
+        
+        proc = subprocess.Popen( args=cmd, shell=True )
+        rtncode = proc.wait()
+        if rtncode != 0:
+            raise Exception
+        
+        cmd = "java -Xmx2g -jar " + args.J + "/GenomeAnalysisTK.jar -T IndelRealigner -R " + args.R + " -I " + gatk_filename + " -targetIntervals " + intervals_filename + " -log " + args.log + " -o " + args.O 
+        print "The cmd is %s" % cmd
+        
+        proc = subprocess.Popen( args=cmd, shell=True )
+        rtncode = proc.wait()
+        if rtncode != 0:
+            raise Exception
+    except Exception:
+        err = traceback.format_exc(None)
+        print err
 
 if __name__=="__main__": __main__()
--- a/GATKSelectVariants.xml	Thu Apr 16 10:53:16 2015 +0900
+++ b/GATKSelectVariants.xml	Thu Apr 16 11:50:32 2015 +0900
@@ -1,32 +1,33 @@
 <tool id="GATK_select_variants" name="GATK Select Variants" version="1.6.13">
-	<description></description>
-	<requirements>
-	        <requirement type="package" version="1.6.13">gatk</requirement>
-	</requirements>
-	<command interpreter="python">
-		GATKSelectVariants.py
-			--I $input
-			--O $output
-			--R $db_refgenomes
-			--nt $nt
-			--st $select_type
-			--log $log
-	</command>
-	<inputs>
-		<param type="data" name="input" format="vcf" label="Input file" />
-		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
-			<options from_data_table="gatk_refgenomes" />
-		</param>
+    <description></description>
+    <requirements>
+            <requirement type="package" version="1.6.13">gatk</requirement>
+    </requirements>
+    <command interpreter="python">
+        GATKSelectVariants.py
+            --J \$JAVA_JAR_PATH
+            --I $input
+            --O $output
+            --R $db_refgenomes
+            --nt $nt
+            --st $select_type
+            --log $log
+    </command>
+    <inputs>
+        <param type="data" name="input" format="vcf" label="Input file" />
+        <param type="select" name="db_refgenomes" label="Reference genome [-R]">
+            <options from_data_table="gatk_refgenomes" />
+        </param>
                 <param type="select" name="select_type" label="select type [-selectType]">
                         <option value="SNP">SNP</option>
                         <option value="INDEL">INDEL</option>
                 </param>
-		<param type="integer" name="nt" value="0" label="processes num [-nt]" help="" />
-	</inputs>
-	<outputs>
-		<data format="vcf" name="output" />
-		<data format="txt" name="log" />
-	</outputs>
-	<help>
-	</help>
+        <param type="integer" name="nt" value="0" label="processes num [-nt]" help="" />
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+        <data format="txt" name="log" />
+    </outputs>
+    <help>
+    </help>
 </tool>
--- a/GATKTableRecalibration.xml	Thu Apr 16 10:53:16 2015 +0900
+++ b/GATKTableRecalibration.xml	Thu Apr 16 11:50:32 2015 +0900
@@ -1,27 +1,28 @@
 <tool id="GATK_table_recalibration" name="GATK Table Recalibration" version="1.6.13">
-	<description></description>
-	<requirements>
-	        <requirement type="package" version="1.6.13">gatk</requirement>
-	</requirements>
-	<command interpreter="python">
-		GATKTableRecalibration.py
-			--I $input
-			--O $output
-			--R $db_refgenomes
-			--C $recal_csv
-			--log $log
-	</command>
-	<inputs>
-		<param type="data" name="input" format="bam" label="Input file" />
-		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
-			<options from_data_table="gatk_refgenomes" />
-		</param>
-		<param type="data" name="recal_csv" format="csv" label="Recal file [-recalFile]" />
-	</inputs>
-	<outputs>
-		<data format="bam" name="output" />
-		<data format="txt" name="log" />
-	</outputs>
-	<help>
-	</help>
+    <description></description>
+    <requirements>
+            <requirement type="package" version="1.6.13">gatk</requirement>
+    </requirements>
+    <command interpreter="python">
+        GATKTableRecalibration.py
+            --J \$JAVA_JAR_PATH
+            --I $input
+            --O $output
+            --R $db_refgenomes
+            --C $recal_csv
+            --log $log
+    </command>
+    <inputs>
+        <param type="data" name="input" format="bam" label="Input file" />
+        <param type="select" name="db_refgenomes" label="Reference genome [-R]">
+            <options from_data_table="gatk_refgenomes" />
+        </param>
+        <param type="data" name="recal_csv" format="csv" label="Recal file [-recalFile]" />
+    </inputs>
+    <outputs>
+        <data format="bam" name="output" />
+        <data format="txt" name="log" />
+    </outputs>
+    <help>
+    </help>
 </tool>
--- a/GATKUnifiedGenotyper.xml	Thu Apr 16 10:53:16 2015 +0900
+++ b/GATKUnifiedGenotyper.xml	Thu Apr 16 11:50:32 2015 +0900
@@ -1,39 +1,40 @@
 <tool id="GATK_unified_genotyper" name="GATK Unified Genotyper" version="1.6.13">
-	<description></description>
-	<requirements>
-	        <requirement type="package" version="1.6.13">gatk</requirement>
-	</requirements>
-	<command interpreter="python">
-		GATKUnifiedGenotyper.py
-			--I $input
-			--O $output
-			--R $db_refgenomes
-			--L $db_beds
-			--nt $nt
-			--stc $stc
-			--sec $sec
-			--log $log
-			--D $db_snps
-	</command>
-	<inputs>
-		<param type="data" name="input" format="bam" label="Input file" />
-		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
-			<options from_data_table="gatk_refgenomes" />
-		</param>
-		<param type="select" name="db_beds" label="Bed [-L]">
-			<options from_data_table="gatk_beds" />
-		</param>
-		<param type="integer" name="nt" value="0" label="processes num [-nt]" />
-		<param type="float" name="stc" value="50.0" label="stand call conf [-stand_call_conf]" />
-		<param type="float" name="sec" value="10.0" label="stand emit conf [-stand_emit_conf]" />
-		<param type="select" name="db_snps" label="dbSNP vcf file [-D]">
-			<options from_data_table="gatk_snps" />
-		</param>
-	</inputs>
-	<outputs>
-		<data format="vcf" name="output" />
-		<data format="txt" name="log" />
-	</outputs>
-	<help>
-	</help>
+    <description></description>
+    <requirements>
+            <requirement type="package" version="1.6.13">gatk</requirement>
+    </requirements>
+    <command interpreter="python">
+        GATKUnifiedGenotyper.py
+            --J \$JAVA_JAR_PATH
+            --I $input
+            --O $output
+            --R $db_refgenomes
+            --L $db_beds
+            --nt $nt
+            --stc $stc
+            --sec $sec
+            --log $log
+            --D $db_snps
+    </command>
+    <inputs>
+        <param type="data" name="input" format="bam" label="Input file" />
+        <param type="select" name="db_refgenomes" label="Reference genome [-R]">
+            <options from_data_table="gatk_refgenomes" />
+        </param>
+        <param type="select" name="db_beds" label="Bed [-L]">
+            <options from_data_table="gatk_beds" />
+        </param>
+        <param type="integer" name="nt" value="0" label="processes num [-nt]" />
+        <param type="float" name="stc" value="50.0" label="stand call conf [-stand_call_conf]" />
+        <param type="float" name="sec" value="10.0" label="stand emit conf [-stand_emit_conf]" />
+        <param type="select" name="db_snps" label="dbSNP vcf file [-D]">
+            <options from_data_table="gatk_snps" />
+        </param>
+    </inputs>
+    <outputs>
+        <data format="vcf" name="output" />
+        <data format="txt" name="log" />
+    </outputs>
+    <help>
+    </help>
 </tool>