Mercurial > repos > jjohnson > samtools_split_by_chrom
view samtools_split_by_chrom.xml @ 1:6fb39843d37d draft default tip
planemo upload commit f3f0bef4a450aafab3c6b05a27647471f93b22f3
author | jjohnson |
---|---|
date | Wed, 22 Mar 2017 17:22:35 -0400 |
parents | a30dd3c77b30 |
children |
line wrap: on
line source
<tool id="samtools_split_by_chrom" name="Split BAM by Chromosome" version="2.0"> <description>into collection</description> <macros> <import>macros.xml</import> </macros> <!-- <code file="samtools_slice_options.py"/> --> <expand macro="requirements"></expand> <expand macro="stdio"></expand> <expand macro="version_command"></expand> <command> <![CDATA[ #import re #set $name = $re.sub('\.bam$','',$input_bam.name) #if str($refs) != 'None': #set ref_list = str($refs).split(",") #else #set ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')] #end if mkdir -p outputs && ln -s "${input_bam}" temp_input.bam && ln -s "${input_bam.metadata.bam_index}" temp_input.bam.bai #for $i,$ref in enumerate($ref_list): #set $idx = "%04d" % $i && samtools view -@ \${GALAXY_SLOTS:-1} -bh temp_input.bam ${ref} | samtools sort -O bam -T sorted -@ \${GALAXY_SLOTS:-1} -o "outputs/${idx}-${name}.${ref}.bam" - #end for && ls -l outputs | awk '/bam/{fname = substr(\$NF,6); printf("%s\t%d\n", fname, \$5)}' > "ls_split_files" ]]> </command> <inputs> <param name="input_bam" format="bam" label="Select BAM dataset to slice" type="data" /> <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" help="Click and type in the box above to see options. You can select multiple entries. If "No options available" is displayed, you need to re-detect metadata on the input dataset. See help section below."> <options> <filter type="data_meta" ref="input_bam" key="reference_names" /> </options> </param> <param name="show_listing" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="show listing"/> </inputs> <outputs> <data format="tabular" name="listing" from_work_dir="ls_split_files"> <filter>show_listing</filter> <actions> <action name="column_names" type="metadata" default="name,size" /> </actions> </data> <collection name="output_collection" type="list" label="${input_bam.name} by chrom"> <discover_datasets pattern="\d+-(?P<designation>.*\.bam)" directory="outputs" format="bam" ext="bam" visible="false"/> </collection> </outputs> <tests> <test> <param ftype="bam" name="input_bam" value="test.bam" /> <param name="show_listing" value="True" /> <output file="listing"> <assert_contents> <has_text expression="chr1" /> <has_text expression="chr2" /> </assert_contents> </output> </test> <test> <param ftype="bam" name="input_bam" value="test.bam" /> <param name="refs" value="chr1,chr3" /> <param name="show_listing" value="True" /> <output file="listing"> <assert_contents> <has_text expression="chr1" /> <not_has_text expression="chr2" /> </assert_contents> </output> </test> </tests> <help> <![CDATA[ **What it does** Splits an input BAM dataset to a dataset collection of individual chromosome bam files. This dataset collection can be passed to a galaxy tool that takes a single bam input in order to split the bam processing across multiple jobs. A suggested use case: hisat -> samtools_split_by_chrom => bcftools_mpileup => bcftools_call => bcftools_merge -> snpEff This tool is based on ``samtools view`` command. ]]> </help> <expand macro="citations"></expand> </tool>