view fastq_trimmer.xml @ 0:feb5479a48ff draft

Imported from capsule None
author devteam
date Thu, 23 Jan 2014 12:31:36 -0500
parents
children 3be753901f6e
line wrap: on
line source

<tool id="fastq_trimmer" name="FASTQ Trimmer" version="1.0.0">
  <description>by column</description>
  <requirements>
    <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement>
  </requirements>
  <command interpreter="python">fastq_trimmer.py '$input_file' '$output_file' '${offset_type['left_column_offset']}' '${offset_type['right_column_offset']}' '${offset_type['base_offset_type']}' '${input_file.extension[len( 'fastq' ):]}' '$keep_zero_length'</command>
  <inputs>
    <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File"/>
    <conditional name="offset_type">
      <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)&lt;br&gt;Use Percentage for variable length reads (Roche/454)">
        <option value="offsets_absolute" selected="true">Absolute Values</option>
        <option value="offsets_percent">Percentage of Read Length</option>
      </param>
      <when value="offsets_absolute">
        <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left">
          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
        </param>
        <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right">
          <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/>
          <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator>
        </param>
      </when>
      <when value="offsets_percent">
        <param name="left_column_offset" label="Offset from 5' end" value="0" type="float">
          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
        </param>
        <param name="right_column_offset" label="Offset from 3' end" value="0" type="float">
          <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/>
        </param>
      </when>
    </conditional>
  <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/>
  </inputs>
  <outputs>
    <data name="output_file" format="input" />
  </outputs>
  <tests>
    <test>
      <!-- Do nothing trim -->
      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
      <param name="base_offset_type" value="offsets_absolute"/>
      <param name="left_column_offset" value="0"/>
      <param name="right_column_offset" value="0"/>
      <param name="keep_zero_length" value="keep_zero_length" />
      <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" />
    </test>
    <!-- Trim to empty File -->
    <test>
      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
      <param name="base_offset_type" value="offsets_absolute"/>
      <param name="left_column_offset" value="30"/>
      <param name="right_column_offset" value="64"/>
      <param name="keep_zero_length" value="exclude_zero_length" />
      <output name="output_file" file="empty_file.dat" />
    </test>
    <test>
      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
      <param name="base_offset_type" value="offsets_percent"/>
      <param name="left_column_offset" value="50"/>
      <param name="right_column_offset" value="50"/>
      <param name="keep_zero_length" value="exclude_zero_length" />
      <output name="output_file" file="empty_file.dat" />
    </test>
    <!-- Trim to 4 inner-most bases -->
    <test>
      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
      <param name="base_offset_type" value="offsets_absolute"/>
      <param name="left_column_offset" value="45"/>
      <param name="right_column_offset" value="45"/>
      <param name="keep_zero_length" value="exclude_zero_length" />
      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
    </test>
    <test>
      <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" />
      <param name="base_offset_type" value="offsets_percent"/>
      <param name="left_column_offset" value="47.87"/>
      <param name="right_column_offset" value="47.87"/>
      <param name="keep_zero_length" value="exclude_zero_length" />
      <output name="output_file" file="fastq_trimmer_out1.fastqsanger" />
    </test>
  </tests>
  <help>
This tool allows you to trim the ends of reads.

You can specify either absolute or percent-based offsets. Offsets are calculated, starting at 0, from the respective end to be trimmed. When using the percent-based method, offsets are rounded to the nearest integer. 

For example, if you have a read of length 36::
  
  @Some FASTQ Sanger Read
  CAATATGTNCTCACTGATAAGTGGATATNAGCNCCA
  +
  =@@.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%&lt;;;%&lt;B@
  
And you set absolute offsets of 2 and 9::
  
  @Some FASTQ Sanger Read
  ATATGTNCTCACTGATAAGTGGATA
  +
  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-4
  
Or you set percent offsets of 6% and 20% (corresponds to absolute offsets of 2,7 for a read length of 36)::
  
  @Some FASTQ Sanger Read
  ATATGTNCTCACTGATAAGTGGATATN
  +
  @.@;B-%?8&gt;CBA@&gt;7@7BBCA4-48%
  
-----

.. class:: warningmark

Trimming a color space read will cause any adapter base to be lost.

------

**Citation**

If you use this tool, please cite `Blankenberg D, Gordon A, Von Kuster G, Coraor N, Taylor J, Nekrutenko A; Galaxy Team. Manipulation of FASTQ data with Galaxy. Bioinformatics. 2010 Jul 15;26(14):1783-5. &lt;http://www.ncbi.nlm.nih.gov/pubmed/20562416&gt;`_


  </help>
</tool>