Mercurial > repos > devteam > short_reads_trim_seq
diff short_reads_trim_seq.xml @ 0:8c0b907e6e5b draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 19 May 2014 10:59:57 -0400 |
parents | |
children | ece3c79e81ae |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/short_reads_trim_seq.xml Mon May 19 10:59:57 2014 -0400 @@ -0,0 +1,93 @@ +<tool id="trim_reads" name="Select high quality segments" version="1.0.0"> +<description></description> + +<command interpreter="python"> + short_reads_trim_seq.py $trim $length $output1 $input1 $input2 $sequencing_method_choice.input3 +</command> +<inputs> +<page> + <param name="input1" type="data" format="fasta" label="Reads" /> + <param name="input2" type="data" format="qualsolexa,qual454" label="Quality scores" /> + <param name="trim" type="integer" size="5" value="20" label="Minimal quality score" help="bases scoring below this value will trigger splitting"/> + <param name="length" type="integer" size="5" value="100" label="Minimal length of contiguous segment" help="report all high quality segments above this length. Setting this option to '0' will cause the program to return a single longest run of high quality bases per read" /> + <conditional name="sequencing_method_choice"> + <param name="sequencer" type="select" label="Select technology"> + <option value="454">Roche (454) or ABI SOLiD</option> + <option value="Solexa">Illumina (Solexa)</option> + </param> + <when value="454"> + <param name="input3" type="select" label="Low quality bases in homopolymers" help="if set to 'DO NOT trigger splitting' the program will not count low quality bases that are within or adjacent to homonucleotide runs. This will significantly reduce fragmentation of 454 data"> + <option value="yes">DO NOT trigger splitting </option> + <option value="no">trigger splitting</option> + </param> + </when> + <when value="Solexa"> + <param name="input3" type="integer" size="5" value="0" label="Restrict length of each read to" help="('0' = do not trim) The quality of Solexa reads drops towards the end. This option allows selecting the specified number of nucleotides from the beginning and then running the tool." /> + </when> + </conditional> +</page> +</inputs> + +<outputs> + <data name="output1" format="fasta" /> +</outputs> + +<tests> + <test> + <param name="sequencer" value="454" /> + <param name="input1" value="454.fasta" ftype="fasta" /> + <param name="input2" value="454.qual" ftype="qual454" /> + <param name="input3" value="no" /> + <param name="trim" value="20" /> + <param name="length" value="0" /> + <output name="output1" file="short_reads_trim_seq_out1.fasta" /> + </test> + <test> + <param name="sequencer" value="Solexa" /> + <param name="input1" value="solexa.fasta" ftype="fasta" /> + <param name="input2" value="solexa.qual" ftype="qualsolexa" /> + <param name="input3" value="0" /> + <param name="trim" value="20" /> + <param name="length" value="0" /> + <output name="output1" file="short_reads_trim_seq_out2.fasta" /> + </test> +</tests> + +<help> + +.. class:: warningmark + +To use this tool, your dataset needs to be in the *Quality Score* format. Click the pencil icon next to your dataset to set the datatype to *Quality Score* (see below for examples). + +----- + +**What it does** + +This tool finds high quality segments within sequencing reads generated by by Roche (454), Illumina (Solexa), or ABI SOLiD machines. + +----- + +**Example** + + +Suppose this is your sequencing read:: + + 5'---------*-------------*------**----3' + +where **dashes** (-) are HIGH quality bases (above 20) and **asterisks** (*) are LOW quality bases (below 20). If the **Minimal length of contiguous segment** is set to **5** (of course, only for the purposes of this example), the tool will return:: + + 5'--------- + ------------- + ------- + +you can see that the tool simply splits the read on low quality bases and then returns all segments longer than 5. **Note**, that the output of this tool will likely contain higher number of shorter sequences compared to the original input. If we set the **Minimal length of contiguous segment** to **0**, the tool will only return the single longest segment:: + + ------------- + + + + + + +</help> +</tool>