view samtools_split_by_chrom.xml @ 1:6fb39843d37d draft default tip

planemo upload commit f3f0bef4a450aafab3c6b05a27647471f93b22f3
author jjohnson
date Wed, 22 Mar 2017 17:22:35 -0400
parents a30dd3c77b30
children
line wrap: on
line source

<tool id="samtools_split_by_chrom" name="Split BAM by Chromosome" version="2.0">
  <description>into collection</description>
  <macros>
    <import>macros.xml</import>
  </macros>
  <!-- <code file="samtools_slice_options.py"/> -->
  <expand macro="requirements"></expand>
  <expand macro="stdio"></expand>
  <expand macro="version_command"></expand>
    <command>
<![CDATA[
    #import re
    #set $name = $re.sub('\.bam$','',$input_bam.name)
    #if str($refs) != 'None':
      #set ref_list = str($refs).split(",")
    #else 
      #set ref_list = [$re.sub('^.*__sq__(.+)__sq__.*$','\\1',n) if n.find('__sq__') >= 0 else n for n in str($input_bam.metadata.reference_names).split(',')]
    #end if
    mkdir -p outputs &&
    ln -s "${input_bam}" temp_input.bam &&
    ln -s "${input_bam.metadata.bam_index}" temp_input.bam.bai 
    #for $i,$ref in enumerate($ref_list):
        #set $idx = "%04d" % $i
        && samtools view -@ \${GALAXY_SLOTS:-1} -bh temp_input.bam ${ref} |  
        samtools sort -O bam -T sorted -@ \${GALAXY_SLOTS:-1} -o "outputs/${idx}-${name}.${ref}.bam" - 
    #end for
    && ls -l outputs | awk '/bam/{fname = substr(\$NF,6); printf("%s\t%d\n", fname, \$5)}' > "ls_split_files"
]]>
    </command>
    <inputs>
        <param name="input_bam" format="bam" label="Select BAM dataset to slice" type="data" />
        <param name="refs" type="select" optional="True" multiple="True" label="Select references (chromosomes and contigs) you would like to restrict bam to" help="Click and type in the box above to see options. You can select multiple entries. If &quot;No options available&quot; is displayed, you need to re-detect metadata on the input dataset. See help section below.">
            <options>
                <filter type="data_meta" ref="input_bam" key="reference_names" />
            </options>
        </param>
        <param name="show_listing" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="show listing"/>
    </inputs>
    <outputs>
        <data format="tabular" name="listing" from_work_dir="ls_split_files">
            <filter>show_listing</filter>
            <actions>
                <action name="column_names" type="metadata" default="name,size" />
            </actions>
        </data>
        <collection name="output_collection" type="list" label="${input_bam.name} by chrom">
          <discover_datasets pattern="\d+-(?P&lt;designation&gt;.*\.bam)" directory="outputs" format="bam" ext="bam" visible="false"/>
        </collection>
    </outputs>
    <tests>
        <test>
            <param ftype="bam" name="input_bam" value="test.bam" />
            <param name="show_listing" value="True" />
            <output file="listing">
                <assert_contents>
                  <has_text expression="chr1" />
                  <has_text expression="chr2" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param ftype="bam" name="input_bam" value="test.bam" />
            <param name="refs" value="chr1,chr3" />
            <param name="show_listing" value="True" />
            <output file="listing">
                <assert_contents>
                  <has_text expression="chr1" />
                  <not_has_text expression="chr2" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
<![CDATA[

**What it does**

Splits an input BAM dataset to a dataset collection of individual chromosome bam files. 

This dataset collection can be passed to a galaxy tool that takes a single bam input 
in order to split the bam processing across multiple jobs.   

A suggested use case:

hisat -> samtools_split_by_chrom => bcftools_mpileup => bcftools_call => bcftools_merge -> snpEff

This tool is based on ``samtools view`` command. 

]]>
  </help>
    <expand macro="citations"></expand>
</tool>