changeset 1:6fb39843d37d draft default tip

planemo upload commit f3f0bef4a450aafab3c6b05a27647471f93b22f3
author jjohnson
date Wed, 22 Mar 2017 17:22:35 -0400
parents a30dd3c77b30
children
files samtools_split_by_chrom.xml test-data/test.bam
diffstat 2 files changed, 49 insertions(+), 18 deletions(-) [+]
line wrap: on
line diff
--- a/samtools_split_by_chrom.xml	Wed Nov 30 16:46:40 2016 -0500
+++ b/samtools_split_by_chrom.xml	Wed Mar 22 17:22:35 2017 -0400
@@ -9,39 +9,65 @@
   <expand macro="version_command"></expand>
     <command>
 <![CDATA[
+    #import re
+    #set $name = $re.sub('\.bam$','',$input_bam.name)
+    #if str($refs) != 'None':
+      #set ref_list = str($refs).split(",")
+    #else 
+      #set ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]
+    #end if
     mkdir -p outputs &&
     ln -s "${input_bam}" temp_input.bam &&
     ln -s "${input_bam.metadata.bam_index}" temp_input.bam.bai 
-    #for $ref in str( $refs ).split(","):
-        && samtools view -@ \${GALAXY_SLOTS:-1} -bh inputs/temp_input.bam ${ref} |  
-        samtools sort -O bam -T sorted -@ \${GALAXY_SLOTS:-1} -o "outputs/${input_bam.name}.${ref}.bam" - 
+    #for $i,$ref in enumerate($ref_list):
+        #set $idx = "%04d" % $i
+        && samtools view -@ \${GALAXY_SLOTS:-1} -bh temp_input.bam ${ref} |  
+        samtools sort -O bam -T sorted -@ \${GALAXY_SLOTS:-1} -o "outputs/${idx}-${name}.${ref}.bam" - 
     #end for
+    && ls -l outputs | awk '/bam/{fname = substr(\$NF,6); printf("%s\t%d\n", fname, \$5)}' > "ls_split_files"
 ]]>
     </command>
     <inputs>
         <param name="input_bam" format="bam" label="Select BAM dataset to slice" type="data" />
-        <param name="refs" type="select" optional="False" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" help="Click and type in the box above to see options. You can select multiple entries. If &quot;No options available&quot; is displayed, you need to re-detect metadata on the input dataset. See help section below.">
-            <!-- The options tagset below extracts reference names from bam file metadata -->
-            <!-- This will not work with bed files with old style metadata. However this  -->
-            <!-- Can be easily fixed by re-deceting metadata on a bam dataset by clicking -->
-            <!-- The pencil icon and settind datatype to "bam"                            -->
-            <!-- This change has been commited in the following pull request:             -->
-            <!-- https://github.com/galaxyproject/galaxy/pull/107                         -->
+        <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" help="Click and type in the box above to see options. You can select multiple entries. If &quot;No options available&quot; is displayed, you need to re-detect metadata on the input dataset. See help section below.">
             <options>
                 <filter type="data_meta" ref="input_bam" key="reference_names" />
             </options>
         </param>
+        <param name="show_listing" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="show listing"/>
     </inputs>
     <outputs>
-        <collection name="output_collection" type='list' label="${input_bam.name} by chrom">
-          <discover_datasets pattern="(?P&lt;designation&gt;.+)\.bam" directory="outputs" ext='bam'/>
+        <data format="tabular" name="listing" from_work_dir="ls_split_files">
+            <filter>show_listing</filter>
+            <actions>
+                <action name="column_names" type="metadata" default="name,size" />
+            </actions>
+        </data>
+        <collection name="output_collection" type="list" label="${input_bam.name} by chrom">
+          <discover_datasets pattern="\d+-(?P&lt;designation&gt;.*\.bam)" directory="outputs" format="bam" ext="bam" visible="false"/>
         </collection>
     </outputs>
     <tests>
         <test>
-            <param ftype="bam" name="input_bam" value="bam-slice-input.bam" />
-            <param name="refs" value="chrM" />
-            <output file="bam-slice-test2.bam" ftype="bam" name="output_bam" />
+            <param ftype="bam" name="input_bam" value="test.bam" />
+            <param name="show_listing" value="True" />
+            <output file="listing">
+                <assert_contents>
+                  <has_text expression="chr1" />
+                  <has_text expression="chr2" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param ftype="bam" name="input_bam" value="test.bam" />
+            <param name="refs" value="chr1,chr3" />
+            <param name="show_listing" value="True" />
+            <output file="listing">
+                <assert_contents>
+                  <has_text expression="chr1" />
+                  <not_has_text expression="chr2" />
+                </assert_contents>
+            </output>
         </test>
     </tests>
     <help>
@@ -49,12 +75,17 @@
 
 **What it does**
 
-Creates a dataset collection of BAM files, one per selected chromosome.
+Splits an input BAM dataset to a dataset collection of individual chromosome bam files. 
+
+This dataset collection can be passed to a galaxy tool that takes a single bam input 
+in order to split the bam processing across multiple jobs.   
+
+A suggested use case:
+
+hisat -> samtools_split_by_chrom => bcftools_mpileup => bcftools_call => bcftools_merge -> snpEff
 
 This tool is based on ``samtools view`` command. 
 
-@no-chrom-options@
-
 ]]>
   </help>
     <expand macro="citations"></expand>
Binary file test-data/test.bam has changed