view bcftools_view.xml @ 0:667b2d503ba3 draft default tip

Uploaded
author takadonet
date Wed, 08 Apr 2015 12:09:16 -0400
parents
children
line wrap: on
line source

<tool id="bcftools_view" name="bcftools_view" version="0.1.0">
    <requirements>
        <requirement type="package" version="1.0">bcftools</requirement>
        <requirement type="package" version="1.0">tabix</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command>
      
           #if str($input.ext) == 'vcf':
           cp $input input.vcf &amp;&amp; bgzip input.vcf &amp;&amp;
           #set $input="input.vcf.gz"
           #end if

           bcftools index $input &amp;&amp; 
           bcftools view

        #if str($output_format) == 'vcf_uncompressed':
        -O v
        #elif str($output_format) =='vcf_compressed':
        -O z
        #elif str($output_format) =='bcf_uncompressed':
        -O u
        #elif str($output_format) =='bcf_compressed':
        -O b        
        #end if

        #if str($header_option) == 'header_only':
         --header-only
        #elif str($header_option) == 'no_header':
        --no-header
        #end if
        
 	-o $output
	$input
        
        #if str($region):
         -r $region
        #end if

          
        #if str($trim_alt_alleles) == "True" then "-a" else "" #
        
        #if str($sites_no_genotype) == "True":
        "-u"
        elif str($sites_no_genotype) == "False":
        "-U" 
        #end if
        
        #if $min_nref:
        --min-ac "$min_nref"
        #end if

        #if $max_nref:
        --max-ac "$max_nref"
        #end if

        #if $samples:
        -s "$samples"
        #end if

       #if $include_types:
       -v "$include_types"
       #end if

        #if $filters:
        --apply-filters "$filters"
        #end if


       #if $select_sites:
       #set $list = str($select_sites).split(',')
       #for $i, $s in enumerate( $list )
           #if str($s) == "known":
          -k
          #elif str($s) == "novel":
          -n 
          #end if
        #end for
       #end if

       #if $private:
       #set $list = str($private).split(',')
       #for $i, $s in enumerate( $list )
           #if str($s) == "private":
          -x
          #elif str($s) == "exclude":
          -X 
          #end if
        #end for
       #end if


        
    </command>
    <inputs>
        <param format="bcf,vcf" label="VCF/BCF file(s) to merged" name="input" optional="false" type="data" />
        <param label="Choose the output format" name="output_format" type="select">
            <option selected="true" value="vcf_uncompressed">UnCompressedVCF</option>
            <option value="vcf_compressed">Compressed VCF</option>
            <option value="bcf_uncompressed">UnCompressed BCF</option>
            <option value="bcf_compressed">Compressed BCF</option>
        </param>
        <param label="Choose the output everything, only header or no header" name="header_option" type="select">
            <option selected="true" value="all">Print All</option>
            <option value="header_only">Header only</option>
            <option value="no_header">No Header</option>
        </param>
        <param help="Accept following format: CHROM:START-END" label="Region to view" name="region" size="30" type="text" value="" />
        <param help="trim alternate alleles not seen in the subset" label="Trim alternate alleles" name="trim_alt_alleles" optional="true" type="select">
            <option value="False">False</option>
            <option value="True">True</option>
        </param>
        <param help="select/exclude sites without a called genotype" label="Sites without a called genotype" name="sites_no_genotype" optional="true" type="select">
            <option value="off">Turn off completely</option>
            <option value="False">False</option>
            <option value="True">True</option>
        </param>
        <param help="minimum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Minimum count for non reference" name="min_nref" optional="true" type="integer" value="" />
        <param help="Maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]" label="Maximum count for non reference" name="max_nref" optional="true" type="integer" value="" />
        <param help="file of samples to include (or exclude with &quot;^&quot; prefix)" label="Samples to include or exclude" name="samples" optional="true" type="text" value="" />
        <param help="select comma-separated list of variant types: snps,indels,mnps,other" label="Select variant types" multiple="true" name="include_types" optional="true" type="select">
            <option value="snps">snps</option>
            <option value="indels">indels</option>
            <option value="mnps">mnps</option>
            <option value="other">other</option>
        </param>
        <param help="Listed FILTER strings (e.g. &quot;PASS, . &quot;)" label="FILTER strings" name="filters" optional="true" type="text" value="" />
        <param help="select known/novel sites only (ID is not/is '.')" label="Select known/novel sites" multiple="true" name="select_sites" optional="true" type="select">
            <option value="known">Known</option>
            <option value="novel">novel</option>
        </param>
        <param label="select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples" multiple="true" name="private" optional="true" type="select">
            <option value="private">private</option>
            <option value="exclude">Exclude private</option>
        </param>
    </inputs>
    <outputs>
        <data format="bcf" name="output">
            <change_format>
                <when format="vcf" input="output_format" value="vcf_uncompressed" />
                <when format="vcf_bgzip" input="output_format" value="vcf_compressed" />
                <when format="bcf_bgzip" input="output_format" value="bcf_compressed" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input" value="input1.bcf.gz" />
            <param name="output_format" value="vcf_uncompressed" />
            <output file="result1.vcf" ftype="vcf" lines_diff="2" name="output" />
        </test>
        <test>
            <param name="input" value="result1.vcf" />
            <param name="output_format" value="bcf_compressed" />
            <output compare="sim_size" delta="100" file="result3.bcf.gz" ftype="bcf_bgzip" name="output" />
        </test>
        <test>
            <param name="input" value="input1.bcf.gz" />
            <param name="header_option" value="no_header" />
            <param name="output_format" value="vcf_compressed" />
            <output file="result2.vcf.gz" ftype="vcf_bgzip" lines_diff="2" name="output" />
        </test>
        <test>
            <param name="input" value="merge.a.bcf" />
            <param name="header_option" value="no_header" />
            <param name="output_format" value="vcf_uncompressed" />
            <param name="region" value="2:3199812-3199812" />
            <output file="tabix.2.3199812.out" ftype="vcf" name="output" />
        </test>
        <test>
            <param name="input" value="merge.a.bcf" />
            <param name="header_option" value="no_header" />
            <param name="output_format" value="vcf_uncompressed" />
            <param name="region" value="1:3000151-3000151" />
            <output file="tabix.1.3000151.out" ftype="vcf" name="output" />
        </test>
        <test>
            <param name="input" value="large_chrom_tbi_limit.bcf" />
            <param name="header_option" value="no_header" />
            <param name="output_format" value="vcf_uncompressed" />
            <param name="region" value="chr11:1-536870912" />
            <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" />
        </test>
        <test>
            <param name="input" value="large_chrom_csi_limit.vcf.gz" />
            <param name="header_option" value="no_header" />
            <param name="output_format" value="vcf_uncompressed" />
            <param name="region" value="chr20:1-2147483647" />
            <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" />
        </test>
        <test>
            <param name="input" value="large_chrom_tbi_limit.vcf.gz" />
            <param name="header_option" value="no_header" />
            <param name="output_format" value="vcf_uncompressed" />
            <param name="region" value="chr11:1-536870912" />
            <output file="large_chrom_tbi_limit.20.1.536870912.out" ftype="vcf" name="output" />
        </test>
        <test>
            <param name="input" value="large_chrom_csi_limit.bcf" />
            <param name="header_option" value="no_header" />
            <param name="output_format" value="vcf_uncompressed" />
            <param name="region" value="chr20:1-2147483647" />
            <output file="large_chrom_csi_limit.20.1.2147483647.out" ftype="vcf" name="output" />
        </test>
        <test>
            <param name="input" value="large_chrom_csi_limit.bcf" />
            <param name="header_option" value="no_header" />
            <param name="output_format" value="vcf_uncompressed" />
            <param name="region" value="chr20" />
            <output file="large_chrom.20.1.2147483647.out" ftype="vcf" name="output" />
        </test>
        <test>
            <param name="input" value="view.vcf.gz" />
            <param name="trim_alt_alleles" value="True" />
            <param name="sites_no_genotype" value="False" />
            <param name="samples" value="NA00002" />
            <param name="min_nref" value="1" />
            <param name="max_nref" value="1" />
            <param name="include_types" value="snps" />
            <param name="output_format" value="vcf_uncompressed" />
            <output file="view.1.out" ftype="vcf" lines_diff="2" name="output" />
        </test>
        <test>
            <param name="input" value="view.vcf.gz" />
            <param name="filters" value="PASS" />
            <param name="select_sites" value="known" />
            <param name="private" value="exclude" />
            <param name="samples" value="NA00003" />
            <param name="region" value="20" />
            <param name="output_format" value="vcf_uncompressed" />
            <output file="view.2.out" ftype="vcf" lines_diff="2" name="output" />
        </test>
        <test>
            <param name="input" value="view.vcf.gz" />
            <param name="private" value="private" />
            <param name="samples" value="NA00003" />
            <param name="output_format" value="vcf_uncompressed" />
            <output file="view.3.out" ftype="vcf" lines_diff="2" name="output" />
        </test>
    </tests>
    <help>
            
About:   VCF/BCF conversion, view, subset and filter VCF/BCF files.

Usage:   bcftools view [options] &lt;in.vcf.gz&gt;; [region1 [...]]

**Output options:**

    -G,   --drop-genotypes              drop individual genotype information (after subsetting if -s option set)

    -h/H, --header-only/--no-header     print the header only/suppress the header in VCF output

    -l,   --compression-level [0-9]     compression level: 0 uncompressed, 1 best speed, 9 best compression [-1]

    -o,   --output-file &lt;file&gt;;          output file name [stdout]

    -O,   --output-type &lt;b|u|z|v&gt;;       b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]

    -r, --regions &lt;region&gt;;              restrict to comma-separated list of regions

    -R, --regions-file &lt;file&gt;;           restrict to regions listed in a file

    -t, --targets [^]&lt;region&gt;;           similar to -r but streams rather than index-jumps. Exclude regions with "^" prefix

    -T, --targets-file [^]&lt;file&gt;;        similar to -R but streams rather than index-jumps. Exclude regions with "^" prefix


**Subset options:**


    -a,   --trim-alt-alleles            trim alternate alleles not seen in the subset

    -I,   --no-update                   do not (re)calculate INFO fields for the subset (currently INFO/AC and INFO/AN)

    -s, --samples [^]&lt;list&gt;;       comma separated list of samples to include (or exclude with "^" prefix)

    --force-samples           only warn about unknown subset samples
    -S, --samples-file [^]&lt;file&gt;;  file of samples to include (or exclude with "^" prefix)




**Filter options:**

    -c/C, --min-ac/--max-ac &lt;int&gt;;[:&lt;type&gt;;]      minimum/maximum count for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]

    -f,   --apply-filters &lt;list&gt;;                require at least one of the listed FILTER strings (e.g. "PASS,.")

    -g,   --genotype [^]&lt;hom|het|miss&gt;;          require one or more hom/het/missing genotype or, if prefixed with "^", exclude sites with hom/het/missing genotypes

    -i/e, --include/--exclude &lt;expr&gt;;            select/exclude sites for which the expression is true (see man page for details)

    -k/n, --known/--novel                       select known/novel sites only (ID is not/is '.')

    -m/M, --min-alleles/--max-alleles &lt;int&gt;;     minimum/maximum number of alleles listed in REF and ALT (e.g. -m2 -M2 for biallelic sites)

    -p/P, --phased/--exclude-phased             select/exclude sites where all samples are phased

    -q/Q, --min-af/--max-af &lt;float&gt;;[:&lt;type&gt;;]    minimum/maximum frequency for non-reference (nref), 1st alternate (alt1), least frequent (minor), most frequent (major) or sum of all but most frequent (nonmajor) alleles [nref]

    -u/U, --uncalled/--exclude-uncalled         select/exclude sites without a called genotype

    -v/V, --types/--exclude-types &lt;list&gt;;        select/exclude comma-separated list of variant types: snps,indels,mnps,other [null]

    -x/X, --private/--exclude-private           select/exclude sites where the non-reference alleles are exclusive (private) to the subset samples
        
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btp352</citation>
        <citation type="doi">10.1093/bioinformatics/btr509</citation>
        <citation type="doi">10.1093/bioinformatics/btr076</citation>
    </citations>
</tool>