changeset 0:5bad325a92d7

commit
author pitagora <ryota.yamanaka@riken.jp>
date Wed, 15 Apr 2015 18:41:05 +0900
parents
children 9b55ddff33e2
files GATKCombineVariants.py GATKCombineVariants.xml GATKCountCovariate.py GATKCountCovariate.xml GATKIndelRealigner.py GATKRealignerTargetCreator.py GATKRealignerTargetCreator.xml GATKSelectVariants.xml GATKTableRecalibration.xml GATKUnifiedGenotyper.xml tool_dependencies.xml
diffstat 11 files changed, 363 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKCombineVariants.py	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import optparse, os, shutil, subprocess, sys, tempfile
+import argparse
+import traceback
+
+def __main__():
+	try:
+		descr = "GATKCombineVariants.py: version 1.0. \n"
+		parser = argparse.ArgumentParser(description=descr)
+		parser.add_argument( '-S', '--S', help='snp vcf file' )
+		parser.add_argument( '-I', '--I', help='indel vcf file' )
+		parser.add_argument( '-R', '--R', help='reference genome file' )
+		parser.add_argument( '-O', '--O', help='output file' )
+		args = parser.parse_args()
+		
+		cmd = "java -Xmx2g -jar GenomeAnalysisTK.jar -R " + args.R + " -T CombineVariants -V:SNV,vcf " + args.S + " -V:INDEL,vcf " + args.I + " -o " + args.O
+		print "The cmd is %s" % cmd
+		
+		proc = subprocess.Popen( args=cmd, shell=True )
+		rtncode = proc.wait()
+		
+		if rtncode != 0:
+			raise Exception
+	except Exception:
+		err = traceback.format_exc(None)
+		print err
+
+if __name__=="__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKCombineVariants.xml	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,25 @@
+<tool id="GATK_combine_variants" name="GATK Combine Variants" version="1.6.13">
+	<description></description>
+	<requirements>
+	    <requirement type="package" version="1.6.13">gatk</requirement>
+	</requirements>
+	<command interpreter="python">
+		GATKCombineVariants.py
+			--S $snp
+			--I $indel
+			--R $db_refgenomes
+			--O $output
+	</command>
+	<inputs>
+		<param type="data" name="snp" format="vcf" label="SNP vcf file" />
+		<param type="data" name="indel" format="vcf" label="INDEL vcf file" />
+		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
+			<options from_data_table="gatk_refgenomes" />
+		</param>
+	</inputs>
+	<outputs>
+		<data format="vcf" name="output" />
+	</outputs>
+	<help>
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKCountCovariate.py	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import optparse, os, shutil, subprocess, sys, tempfile
+import argparse
+import traceback
+
+def __main__():
+	try:
+		descr = "GATKCountCovariate.py: version 1.0. kick a GATK Count Covariate command. \n"
+		parser = argparse.ArgumentParser(description=descr)
+		parser.add_argument( '-I', '--I', help='input file' )
+		parser.add_argument( '-O', '--O', help='output file' )
+		parser.add_argument( '-R', '--R', help='reference genome file' )
+		parser.add_argument( '-KN', '--KN', help='known sites' )
+		parser.add_argument( '-nt', '--nt', help='process count' )
+		parser.add_argument( '-log', '--log', help='log file' )
+		args = parser.parse_args()
+		
+		target_dir = os.getcwd()
+		gatk_filename = os.path.join( target_dir, "gatk_file.bam" )
+		os.symlink( args.I, gatk_filename )
+		
+		cmd = "samtools index " + gatk_filename
+		print "The cmd is %s " % cmd
+		
+		proc = subprocess.Popen( args=cmd, shell=True )
+		rtncode = proc.wait()
+		
+		if rtncode != 0:
+			raise Exception
+		
+		cmd = "java -Xmx2g -jar GenomeAnalysisTK.jar -T CountCovariates -I " + gatk_filename + " -R " + args.R + " -knownSites:dbsnp,VCF " + args.KN + " -nt " + args.nt + " -l INFO -cov ReadGroupCovariate -cov QualityScoreCovariate -cov CycleCovariate -cov DinucCovariate -log " + args.log + " -recalFile " + args.O
+		print "The cmd is %s" % cmd
+		
+		proc = subprocess.Popen( args=cmd, shell=True )
+		rtncode = proc.wait()
+		
+		if rtncode != 0:
+			raise Exception
+	except Exception:
+		err = traceback.format_exc(None)
+		print err
+
+if __name__=="__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKCountCovariate.xml	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,31 @@
+<tool id="GATK_count_covariate" name="GATK Count Covariate" version="1.6.13">
+	<description></description>
+	<requirements>
+	        <requirement type="package" version="1.6.13">gatk</requirement>
+	</requirements>
+	<command interpreter="python">
+		GATKCountCovariate.py
+			--I $input
+			--O $output
+			--R $db_refgenomes
+			--KN $db_snps
+			--nt $nt
+			--log $log
+	</command>
+	<inputs>
+		<param type="data" name="input" format="bam" label="Input file" />
+		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
+			<options from_data_table="gatk_refgenomes" />
+		</param>
+		<param type="select" name="db_snps" label="Known sites [-knownSites]">
+			<options from_data_table="gatk_snps" />
+		</param>
+		<param type="integer" name="nt" value="0" label="processes num [-nt]" help="" />
+	</inputs>
+	<outputs>
+		<data format="csv" name="output" />
+		<data format="txt" name="log" />
+	</outputs>
+	<help>
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKIndelRealigner.py	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,47 @@
+#!/home/ubuntu/galaxy-python/python
+# -*- coding: utf-8 -*-
+
+import optparse, os, shutil, subprocess, sys, tempfile
+import argparse
+import traceback
+
+def __main__():
+	try:
+		GATK_DIR = "/home/ubuntu/ext_tools/GenomeAnalysisTK-1.6-13-g91f02df"
+		
+		descr = "GATKIndelRealigner.py: version 1.0. kick a GATK Indel Realigner command. \n"
+		parser = argparse.ArgumentParser(description=descr)
+		parser.add_argument( '-I', '--I', help='input file' )
+		parser.add_argument( '-O', '--O', help='output file' )
+		parser.add_argument( '-T', '--T', help='target intervals file' )
+		parser.add_argument( '-R', '--R', help='reference genome file' )
+		parser.add_argument( '-log', '--log', help='log file' )
+		args = parser.parse_args()
+		
+		target_dir = os.getcwd()
+		gatk_filename = os.path.join( target_dir, "gatk_file.bam" )
+		os.symlink( args.I, gatk_filename )
+		
+		intervals_filename = os.path.join( target_dir, "intervals_file.intervals" )
+		os.symlink( args.T, intervals_filename )
+		
+		cmd = "samtools index " + gatk_filename
+		print "The cmd is %s" % cmd
+		
+		proc = subprocess.Popen( args=cmd, shell=True )
+		rtncode = proc.wait()
+		if rtncode != 0:
+			raise Exception
+		
+		cmd = "java -Xmx22g -jar " + GATK_DIR + "/GenomeAnalysisTK.jar -T IndelRealigner -R " + args.R + " -I " + gatk_filename + " -targetIntervals " + intervals_filename + " -log " + args.log + " -o " + args.O 
+		print "The cmd is %s" % cmd
+		
+		proc = subprocess.Popen( args=cmd, shell=True )
+		rtncode = proc.wait()
+		if rtncode != 0:
+			raise Exception
+	except Exception:
+		err = traceback.format_exc(None)
+		print err
+
+if __name__=="__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKRealignerTargetCreator.py	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import optparse, os, shutil, subprocess, sys, tempfile
+import argparse
+import traceback
+
+def __main__():
+	try:
+		descr = "GATKRealignerTargetCreator.py: version 1.0. kick a GATK Realigner Target Creator command. \n"
+		parser = argparse.ArgumentParser(description=descr)
+		parser.add_argument( '-I', '--I', help='input file' )
+		parser.add_argument( '-O', '--O', help='output file' )
+		parser.add_argument( '-L', '--L', help='bed file' )
+		parser.add_argument( '-R', '--R', help='reference genome file' )
+		parser.add_argument( '-nt', '--nt', help='process count' )
+		parser.add_argument( '-log', '--log', help='log' )
+		args = parser.parse_args()
+		
+		target_dir = os.getcwd()
+		gatk_filename = os.path.join( target_dir, "gatk_file.bam" )
+		os.symlink( args.I, gatk_filename )
+		
+		cmd1 = "samtools index " + gatk_filename
+		print "The cmd is %s" % cmd1
+		
+		proc1 = subprocess.Popen( args=cmd1, shell=True )
+		rtncode1 = proc1.wait()
+		
+		if rtncode1 != 0:
+			raise Exception
+		
+		cmd2 = "java -Xmx2g -jar GenomeAnalysisTK.jar -T RealignerTargetCreator -R " + args.R + " -nt " + args.nt + " -I " + gatk_filename + " -L " + args.L + " -log " + args.log + " -o " + args.O
+		print "The cmd is %s" % cmd2
+		
+		proc2 = subprocess.Popen( args=cmd2, shell=True )
+		rtncode2 = proc2.wait()
+		
+		if rtncode2 != 0:
+			raise Exception
+	except Exception:
+		err = traceback.format_exc(None)
+		print err
+
+if __name__=="__main__": __main__()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKRealignerTargetCreator.xml	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,33 @@
+<tool id="GATK_realigner_target_creator" name="GATK Realigner Target Creator" version="1.6.13">
+	<description></description>
+	<command interpreter="python">
+		GATKRealignerTargetCreator.py
+			--I $input
+			--O $output
+			--L $db_beds
+			--R $db_refgenomes
+			--nt $nt
+			--log $log
+	</command>
+	<requirements>
+	        <requirement type="package" version="1.6.13">gatk</requirement>
+	        <requirement type="package" version="0.1.19">samtools</requirement>
+	</requirements>
+	<inputs>
+		<param type="data" name="input" format="bam" label="Input file" />
+		<param type="integer" name="nt" value="0" label="processes num [-nt]" help="" />
+		<param type="select" name="db_beds" label="bed file [-L]">
+			<options from_data_table="gatk_beds" />
+		</param>
+		<param type="select" name="db_refgenomes" label="reference genome [-R]">
+			<options from_data_table="gatk_refgenomes" />
+		</param>
+	</inputs>
+	<outputs>
+		<data format="gatk_interval" name="output" />
+		<data format="txt" name="log" />
+	</outputs>
+	<help>
+	</help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKSelectVariants.xml	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,32 @@
+<tool id="GATK_select_variants" name="GATK Select Variants" version="1.6.13">
+	<description></description>
+	<requirements>
+	        <requirement type="package" version="1.6.13">gatk</requirement>
+	</requirements>
+	<command interpreter="python">
+		GATKSelectVariants.py
+			--I $input
+			--O $output
+			--R $db_refgenomes
+			--nt $nt
+			--st $select_type
+			--log $log
+	</command>
+	<inputs>
+		<param type="data" name="input" format="vcf" label="Input file" />
+		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
+			<options from_data_table="gatk_refgenomes" />
+		</param>
+                <param type="select" name="select_type" label="select type [-selectType]">
+                        <option value="SNP">SNP</option>
+                        <option value="INDEL">INDEL</option>
+                </param>
+		<param type="integer" name="nt" value="0" label="processes num [-nt]" help="" />
+	</inputs>
+	<outputs>
+		<data format="vcf" name="output" />
+		<data format="txt" name="log" />
+	</outputs>
+	<help>
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKTableRecalibration.xml	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,27 @@
+<tool id="GATK_table_recalibration" name="GATK Table Recalibration" version="1.6.13">
+	<description></description>
+	<requirements>
+	        <requirement type="package" version="1.6.13">gatk</requirement>
+	</requirements>
+	<command interpreter="python">
+		GATKTableRecalibration.py
+			--I $input
+			--O $output
+			--R $db_refgenomes
+			--C $recal_csv
+			--log $log
+	</command>
+	<inputs>
+		<param type="data" name="input" format="bam" label="Input file" />
+		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
+			<options from_data_table="gatk_refgenomes" />
+		</param>
+		<param type="data" name="recal_csv" format="csv" label="Recal file [-recalFile]" />
+	</inputs>
+	<outputs>
+		<data format="bam" name="output" />
+		<data format="txt" name="log" />
+	</outputs>
+	<help>
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GATKUnifiedGenotyper.xml	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,39 @@
+<tool id="GATK_unified_genotyper" name="GATK Unified Genotyper" version="1.6.13">
+	<description></description>
+	<requirements>
+	        <requirement type="package" version="1.6.13">gatk</requirement>
+	</requirements>
+	<command interpreter="python">
+		GATKUnifiedGenotyper.py
+			--I $input
+			--O $output
+			--R $db_refgenomes
+			--L $db_beds
+			--nt $nt
+			--stc $stc
+			--sec $sec
+			--log $log
+			--D $db_snps
+	</command>
+	<inputs>
+		<param type="data" name="input" format="bam" label="Input file" />
+		<param type="select" name="db_refgenomes" label="Reference genome [-R]">
+			<options from_data_table="gatk_refgenomes" />
+		</param>
+		<param type="select" name="db_beds" label="Bed [-L]">
+			<options from_data_table="gatk_beds" />
+		</param>
+		<param type="integer" name="nt" value="0" label="processes num [-nt]" />
+		<param type="float" name="stc" value="50.0" label="stand call conf [-stand_call_conf]" />
+		<param type="float" name="sec" value="10.0" label="stand emit conf [-stand_emit_conf]" />
+		<param type="select" name="db_snps" label="dbSNP vcf file [-D]">
+			<options from_data_table="gatk_snps" />
+		</param>
+	</inputs>
+	<outputs>
+		<data format="vcf" name="output" />
+		<data format="txt" name="log" />
+	</outputs>
+	<help>
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Wed Apr 15 18:41:05 2015 +0900
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="gatk" version="1.6.13">
+    <repository changeset_revision="8e93fe5935fd" name="package_gatk_1_6_13" owner="pitagora" prior_installation_required="False" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+  </package>
+  <package name="samtools" version="0.1.19">
+    <repository changeset_revision="95d2c4aefb5f" name="package_samtools_0_1_19" owner="devteam" prior_installation_required="False" toolshed="https://toolshed.g2.bx.psu.edu" />
+  </package>
+</tool_dependency>