comparison samtools_split_by_chrom.xml @ 1:6fb39843d37d draft default tip

planemo upload commit f3f0bef4a450aafab3c6b05a27647471f93b22f3
author jjohnson
date Wed, 22 Mar 2017 17:22:35 -0400
parents a30dd3c77b30
children
comparison
equal deleted inserted replaced
0:a30dd3c77b30 1:6fb39843d37d
7 <expand macro="requirements"></expand> 7 <expand macro="requirements"></expand>
8 <expand macro="stdio"></expand> 8 <expand macro="stdio"></expand>
9 <expand macro="version_command"></expand> 9 <expand macro="version_command"></expand>
10 <command> 10 <command>
11 <![CDATA[ 11 <![CDATA[
12 #import re
13 #set $name = $re.sub('\.bam$','',$input_bam.name)
14 #if str($refs) != 'None':
15 #set ref_list = str($refs).split(",")
16 #else
17 #set ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]
18 #end if
12 mkdir -p outputs && 19 mkdir -p outputs &&
13 ln -s "${input_bam}" temp_input.bam && 20 ln -s "${input_bam}" temp_input.bam &&
14 ln -s "${input_bam.metadata.bam_index}" temp_input.bam.bai 21 ln -s "${input_bam.metadata.bam_index}" temp_input.bam.bai
15 #for $ref in str( $refs ).split(","): 22 #for $i,$ref in enumerate($ref_list):
16 && samtools view -@ \${GALAXY_SLOTS:-1} -bh inputs/temp_input.bam ${ref} | 23 #set $idx = "%04d" % $i
17 samtools sort -O bam -T sorted -@ \${GALAXY_SLOTS:-1} -o "outputs/${input_bam.name}.${ref}.bam" - 24 && samtools view -@ \${GALAXY_SLOTS:-1} -bh temp_input.bam ${ref} |
25 samtools sort -O bam -T sorted -@ \${GALAXY_SLOTS:-1} -o "outputs/${idx}-${name}.${ref}.bam" -
18 #end for 26 #end for
27 && ls -l outputs | awk '/bam/{fname = substr(\$NF,6); printf("%s\t%d\n", fname, \$5)}' > "ls_split_files"
19 ]]> 28 ]]>
20 </command> 29 </command>
21 <inputs> 30 <inputs>
22 <param name="input_bam" format="bam" label="Select BAM dataset to slice" type="data" /> 31 <param name="input_bam" format="bam" label="Select BAM dataset to slice" type="data" />
23 <param name="refs" type="select" optional="False" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" help="Click and type in the box above to see options. You can select multiple entries. If &quot;No options available&quot; is displayed, you need to re-detect metadata on the input dataset. See help section below."> 32 <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" help="Click and type in the box above to see options. You can select multiple entries. If &quot;No options available&quot; is displayed, you need to re-detect metadata on the input dataset. See help section below.">
24 <!-- The options tagset below extracts reference names from bam file metadata -->
25 <!-- This will not work with bed files with old style metadata. However this -->
26 <!-- Can be easily fixed by re-deceting metadata on a bam dataset by clicking -->
27 <!-- The pencil icon and settind datatype to "bam" -->
28 <!-- This change has been commited in the following pull request: -->
29 <!-- https://github.com/galaxyproject/galaxy/pull/107 -->
30 <options> 33 <options>
31 <filter type="data_meta" ref="input_bam" key="reference_names" /> 34 <filter type="data_meta" ref="input_bam" key="reference_names" />
32 </options> 35 </options>
33 </param> 36 </param>
37 <param name="show_listing" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="show listing"/>
34 </inputs> 38 </inputs>
35 <outputs> 39 <outputs>
36 <collection name="output_collection" type='list' label="${input_bam.name} by chrom"> 40 <data format="tabular" name="listing" from_work_dir="ls_split_files">
37 <discover_datasets pattern="(?P&lt;designation&gt;.+)\.bam" directory="outputs" ext='bam'/> 41 <filter>show_listing</filter>
42 <actions>
43 <action name="column_names" type="metadata" default="name,size" />
44 </actions>
45 </data>
46 <collection name="output_collection" type="list" label="${input_bam.name} by chrom">
47 <discover_datasets pattern="\d+-(?P&lt;designation&gt;.*\.bam)" directory="outputs" format="bam" ext="bam" visible="false"/>
38 </collection> 48 </collection>
39 </outputs> 49 </outputs>
40 <tests> 50 <tests>
41 <test> 51 <test>
42 <param ftype="bam" name="input_bam" value="bam-slice-input.bam" /> 52 <param ftype="bam" name="input_bam" value="test.bam" />
43 <param name="refs" value="chrM" /> 53 <param name="show_listing" value="True" />
44 <output file="bam-slice-test2.bam" ftype="bam" name="output_bam" /> 54 <output file="listing">
55 <assert_contents>
56 <has_text expression="chr1" />
57 <has_text expression="chr2" />
58 </assert_contents>
59 </output>
60 </test>
61 <test>
62 <param ftype="bam" name="input_bam" value="test.bam" />
63 <param name="refs" value="chr1,chr3" />
64 <param name="show_listing" value="True" />
65 <output file="listing">
66 <assert_contents>
67 <has_text expression="chr1" />
68 <not_has_text expression="chr2" />
69 </assert_contents>
70 </output>
45 </test> 71 </test>
46 </tests> 72 </tests>
47 <help> 73 <help>
48 <![CDATA[ 74 <![CDATA[
49 75
50 **What it does** 76 **What it does**
51 77
52 Creates a dataset collection of BAM files, one per selected chromosome. 78 Splits an input BAM dataset to a dataset collection of individual chromosome bam files.
79
80 This dataset collection can be passed to a galaxy tool that takes a single bam input
81 in order to split the bam processing across multiple jobs.
82
83 A suggested use case:
84
85 hisat -> samtools_split_by_chrom => bcftools_mpileup => bcftools_call => bcftools_merge -> snpEff
53 86
54 This tool is based on ``samtools view`` command. 87 This tool is based on ``samtools view`` command.
55
56 @no-chrom-options@
57 88
58 ]]> 89 ]]>
59 </help> 90 </help>
60 <expand macro="citations"></expand> 91 <expand macro="citations"></expand>
61 </tool> 92 </tool>