changeset 0:e3b753e2ee24 draft default tip

planemo upload for repository https://github.com/morinlab/tools-morinlab/tree/master/tools/delly commit 4ef2d91b7c1686a2696b92fe538d4aec51d05e40-dirty
author morinlab
date Tue, 11 Oct 2016 14:20:05 -0400
parents
children
files citations.xml delly.xml tool-data/all_fasta.loc.sample tool_data_table_conf.xml.sample tool_dependencies.xml
diffstat 5 files changed, 335 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/citations.xml	Tue Oct 11 14:20:05 2016 -0400
@@ -0,0 +1,153 @@
+<macros>
+    <xml name="morinlab_citation">
+        <citation type="bibtex">
+            @unpublished{
+            albuquerque2016galaxy,
+            author = "Marco Albuquerque and Bruno Grande and Elie Ritch and Martin Krzywinski and Prasath Pararajalingam and Selin Jessa and Paul Boutros and Sohrab Shah and Ryan Morin",
+            title = "A Suite of Galaxy Tools for Cancer Mutational Analysis",
+            note = "Unpublished Manuscript",
+            year = "2016"
+            }
+        </citation>
+    </xml>
+    <xml name="vardict_citation">
+        <citation type="bibtex">
+            @article{
+            Lai20062016,
+            title = {VarDict: a novel and versatile variant caller for next-generation sequencing in cancer research},
+            author = {Lai, Zhongwu and Markovets, Aleksandra and Ahdesmaki, Miika and Chapman, Brad and Hofmann, Oliver and McEwen, Robert and Johnson, Justin and Dougherty, Brian and Barrett, J. Carl and Dry, Jonathan R.}, 
+            journal = {Nucleic Acids Research} 
+            volume = {44}, 
+            number = {11}, 
+            pages = {e108}, 
+            year = {2016}, 
+            doi = {10.1093/nar/gkw227}
+            }
+        </citation>
+    </xml>
+    <xml name="somatic_sniper_citation">
+        <citation>
+            @article{
+            Larson01022012,
+            author = {Larson, David E. and Harris, Christopher C. and Chen, Ken and Koboldt, Daniel C. and Abbott, Travis E. and Dooling, David J. and Ley, Timothy J. and Mardis, Elaine R. and Wilson, Richard K. and Ding, Li}, 
+            title = {SomaticSniper: identification of somatic point mutations in whole genome sequencing data},
+            volume = {28}, 
+            number = {3}, 
+            pages = {311-317}, 
+            year = {2012}, 
+            doi = {10.1093/bioinformatics/btr665}, 
+            journal = {Bioinformatics} 
+            }
+        </citation>
+    </xml>
+    <xml name="galaxy_citation">
+        <citation>
+          @ARTICLE{Goecks2010-ra,
+          title    = "Galaxy: a comprehensive approach for supporting accessible,
+                      reproducible, and transparent computational research in the life
+                      sciences",
+          author   = "Goecks, Jeremy and Nekrutenko, Anton and Taylor, James and
+                      {Galaxy Team}",
+          journal  = "Genome Biol.",
+          volume   =  11,
+          number   =  8,
+          pages    = "R86",
+          month    =  "25~",
+          year     =  2010
+        }
+        </citation>
+    </xml>
+
+    <xml name="delly_citation">
+      <citation>
+      @ARTICLE{Rausch2012-yi,
+  title    = "{DELLY}: structural variant discovery by integrated paired-end
+              and split-read analysis",
+  author   = "Rausch, Tobias and Zichner, Thomas and Schlattl, Andreas and
+              St{\"{u}}tz, Adrian M and Benes, Vladimir and Korbel, Jan O",
+  journal  = "Bioinformatics",
+  volume   =  28,
+  number   =  18,
+  pages    = "i333--i339",
+  month    =  "15~",
+  year     =  2012
+}
+      </citation>
+    </xml>
+    <xml name="mutationseq_citation">
+        <citation type="bibtex">
+@ARTICLE{Ding2012-jq,
+  title    = "Feature-based classifiers for somatic mutation detection in
+              tumour-normal paired sequencing data",
+  author   = "Ding, Jiarui and Bashashati, Ali and Roth, Andrew and Oloumi,
+              Arusha and Tse, Kane and Zeng, Thomas and Haffari, Gholamreza and
+              Hirst, Martin and Marra, Marco A and Condon, Anne and Aparicio,
+              Samuel and Shah, Sohrab P",
+  journal  = "Bioinformatics",
+  volume   =  28,
+  number   =  2,
+  pages    = "167--175",
+  month    =  "15~" # jan,
+  year     =  2012
+}
+        </citation>
+    </xml>
+
+    <xml name="strelka_citation">
+        <citation type="bibtex">
+        @ARTICLE{Saunders2012-nh,
+          title    = "Strelka: accurate somatic small-variant calling from sequenced
+                      tumor-normal sample pairs",
+          author   = "Saunders, Christopher T and Wong, Wendy S W and Swamy, Sajani and
+                      Becq, Jennifer and Murray, Lisa J and Cheetham, R Keira",
+          journal  = "Bioinformatics",
+          volume   =  28,
+          number   =  14,
+          pages    = "1811--1817",
+          month    =  "15~" # jul,
+          year     =  2012
+        }
+        </citation>
+    </xml>
+    <xml name="radia_citation">
+        <citation type="bibtex">
+            @article{
+            Radenbaugh2014-tj,
+            title={RADIA: RNA and DNA integrated analysis for somatic
+            mutation detection},
+            author={Radenbaugh, Amie J and Ma, Singer and Ewing, Adam and Stuart,
+            Joshua M and Collisson, Eric A and Zhu, Jingchun and Haussler,
+            David},
+            journal={PLoS One},
+            volume={9},
+            number={11},
+            pages={e111516},
+            year={2014},
+            publisher={PLoS}
+            }
+        </citation>
+    </xml>
+    <xml name="titan_citation">
+      <citation type="bibtex">
+          @ARTICLE{Ha2014-pu,
+  title       = "{TITAN}: inference of copy number architectures in clonal cell
+                 populations from tumor whole-genome sequence data",
+  author      = "Ha, Gavin and Roth, Andrew and Khattra, Jaswinder and Ho,
+                 Julie and Yap, Damian and Prentice, Leah M and Melnyk,
+                 Nataliya and McPherson, Andrew and Bashashati, Ali and Laks,
+                 Emma and Biele, Justina and Ding, Jiarui and Le, Alan and
+                 Rosner, Jamie and Shumansky, Karey and Marra, Marco A and
+                 Gilks, C Blake and Huntsman, David G and McAlpine, Jessica N
+                 and Aparicio, Samuel and Shah, Sohrab P",
+  journal     = "Genome Res.",
+  publisher   = "Cold Spring Harbor Lab",
+  volume      =  24,
+  number      =  11,
+  pages       = "1881--1893",
+  month       =  "1~" # nov,
+  year        =  2014,
+  keywords    = "computational method"
+}
+      </citation>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/delly.xml	Tue Oct 11 14:20:05 2016 -0400
@@ -0,0 +1,149 @@
+<tool id="delly" name="DELLY" version="0.6.1">
+
+        <description>structural variant discovery by integrated paired-end and split-read analysis</description>
+	
+	<macros>
+		<import>citations.xml</import>
+	</macros>
+
+	<requirements>
+		<requirement type="package" version="1.55.0">boost</requirement>
+		<requirement type="package" version="0.6.1">delly</requirement>
+		<requirement type="set_environment">DELLY_DIR</requirement>
+		<requirement type="set_environment">BOOST_ROOT_DIR"</requirement>
+		<requirement type="set_environment">LD_LIBRARY_PATH"</requirement>
+		<requirement type="set_environment">LIBRARY_PATH"</requirement>
+		<requirement type="set_environment">CPLUS_INCLUDE_PATH"</requirement>
+		<requirement type="set_environment">C_INCLUDE_PATH"</requirement>
+	</requirements>
+
+	<command detect_errors="aggressive"> 
+
+		<!-- BAM and BAI linking, (1) link BAM to new BAM file & (2) link BAM metadata to new BAI file -->
+		#for $i, $s in enumerate( $repeatBam )
+			ln -s $s.sortedBam ./input$(i).bam;
+			ln -s $s.sortedBam.metadata.bam_index ./input$(i).bam.bai;
+		#end for		
+
+		<!-- Sets args to a list of types selected -->
+		#if not isinstance( $variant_source_selector_param.value, list ):
+			#set $args = [ $variant_source_selector_param.value ]
+		#else:
+			#set $args = $variant_source_selector_param.value
+		#end if
+
+		<!-- Run Delly Jobs for each type selected -->
+		#for $option in $args
+			\$DELLY_DIR/src/delly 
+			  -t $option 
+			  -o ./output.$(option).vcf 
+			  -q $advancedsettings.mapQual 
+			  -s $advancedsettings.madCutoff 
+			
+			#if $option == "DEL":
+			  -m $advancedsettings.minFlank 
+			#end if 
+			
+			-u $advancedsettings.genoQual 
+
+			#if $advancedsettings.vcfgeno
+			  -v $advancedsettings.vcfgeno
+			#end if
+                        
+			#if $reference_source.reference_source_selector_param == "cached":
+				-g "${reference_source.reference.fields.path}"
+			#else:
+				-g $reference_source.reference
+			#end if
+
+			<!-- add each input bam to command -->
+			#for $i, $s in enumerate( $repeatBam ):
+				./input$(i).bam	
+			#end for
+			;
+  		#end for
+
+		<!-- Combine VCF Files and Sort Lexographically -->
+		#set $option = $args[0]
+		grep ^\# output.$(option).vcf > $outfile;
+		grep ^\# -v output.$(option).vcf > variants.txt;
+		
+		<!-- If we called more than a single variant type, concatenate all the other types variant output -->
+		#if isinstance( $variant_source_selector_param.value, list ):
+			#for $option in $args[1:]
+				grep ^\# -v output.$(option).vcf >> variants.txt;
+			#end for
+		#end if
+		
+		<!-- Sort all variant output, assuming that it will sort lexographically by chromosome, then position, ID  -->
+		<!-- In future, maybe develop a script to sort by bam header -->
+		sort -k1,1d -k2,2n -k3,3d variants.txt > sortedVariants.txt;
+		
+		<!-- Filter Variants that have Passed Quality Checks -->
+		#if $filterCalls
+			awk '{if ($7 == "PASS") print $0;}' sortedVariants.txt >> $outfile;
+		#else
+			cat sortedVariants.txt >> $outfile;
+		#end if
+		
+	</command>
+ 
+	<inputs>
+
+		<!-- REFERENCE OPTIONS -->
+		<conditional name="reference_source">
+			<param type="select" name="reference_source_selector_param" label="Choose the source for the reference genome">
+				<option value="cached" selected="True">Use a built-in genome</option>
+				<option value="history">Use a genome from the history</option>
+			</param> 
+			<when value="cached">
+				<param type="select" name="reference" label="Genome">
+					<options from_data_table="all_fasta"/>
+				</param>
+			</when>
+			<when value="history">
+				<param type="data" format="fasta" name="reference" label="Genome"/>
+			</when>
+		</conditional>
+
+		<!-- VARIANT OPTIONS -->
+		<param type="select" multiple="True" name="variant_source_selector_param" label="Select variants to identify in samples">
+			<option value="DEL" selected="true">Deletions</option>
+			<option value="DUP">Duplications</option>
+			<option value="INV">Inversions</option>
+			<option value="TRA">Translocations</option>
+		</param>
+                <!--
+		<param name="interval_file" type="data" format="txt" optional="true" label="Interval file" help="Created by make parallel, only use when parallelism is turned on, note interchromosomal and intrachromosomal events have different interval files"/>
+		-->
+		<repeat name="repeatBam" title="Bam Alignment" min="1" default="1" >	
+			<param format="bam" name="sortedBam" type="data" label="File" />		
+		</repeat>
+		<!-- <param name="excludeFile" type="data" format="bed" optional="true" label="Chromosomes to Exclude"/> -->
+		<param name="filterCalls" type="boolean" value="false" label="Filter Poor Variant Calls"/>
+		
+                <section name="advancedsettings" title="Advanced Settings" expanded="false">
+		<!-- Paired End Options -->
+		<param name="mapQual" type="integer" value="0" min="0" max="255" label="PE - Minimum Mapping Quality" />
+		<param name="madCutoff" type="integer" value="9" min="0" max="255" label="PE - Insert Size Cutoff" />
+
+		<!-- SR Options -->
+		<param name="minFlank" type="integer" value="13" label="SR - Minimum Flanking Sequence" />
+
+		<!-- Genotyping Options -->
+		<param format="vcf" name="vcfgeno" type="data" optional="true" label="GT - Input VCF" />
+		<param name="genoQual" type="integer" value="20" min="0" max="255" label="GT - Minimum Mapping Quality" />
+                </section>
+	</inputs>
+  
+	<outputs>
+		<data format="vcf" name="outfile" />
+	</outputs>
+	
+	<citations>
+		<expand macro="morinlab_citation"/>
+		<expand macro="galaxy_citation"/>
+		<expand macro="delly_citation"/>
+	</citations>
+  
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Tue Oct 11 14:20:05 2016 -0400
@@ -0,0 +1,17 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>      <dbkey>         <display_name>  <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3        apiMel3 Honeybee (Apis mellifera): apiMel3              /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon      hg19            Human (Homo sapiens): hg19 Canonical            /path/to/genome/hg19/hg19canon.fa
+#hg19full       hg19            Human (Homo sapiens): hg19 Full                 /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Oct 11 14:20:05 2016 -0400
@@ -0,0 +1,7 @@
+<tables>
+	<table name="all_fasta" comment_char="#">
+		<columns>value, dbkey, name, path</columns>
+		<file path="tool-data/all_fasta.loc"/>
+	</table>
+</tables>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Oct 11 14:20:05 2016 -0400
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+		<package name="boost" version="1.55.0">
+			<repository changeset_revision="56042def4337" name="package_boost_1_55_0" owner="morinlab" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+		</package>
+		<package name="delly" version="0.6.1">
+			<repository changeset_revision="562dac74796a" name="package_delly_0_6_1" owner="morinlab" toolshed="https://testtoolshed.g2.bx.psu.edu" />
+		</package>
+	</tool_dependency>