Mercurial > repos > devteam > fastq_manipulation
changeset 3:7ea141c4c834 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_manipulation commit f2582539542b33240234e8ea6093e25d0aee9b6a
author | devteam |
---|---|
date | Sat, 30 Sep 2017 14:56:13 -0400 (2017-09-30) |
parents | 16d28d67ebeb |
children | 6f864b826b3e |
files | fastq_manipulation.py fastq_manipulation.xml test-data/sanger_full_range_as_rna.fastqsanger.bz2 test-data/sanger_full_range_as_rna.fastqsanger.gz tool_dependencies.xml |
diffstat | 5 files changed, 349 insertions(+), 384 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_manipulation.py Fri Dec 18 19:28:39 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -#Dan Blankenberg -import sys, os, shutil -import imp -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter - -def main(): - #Read command line arguments - input_filename = sys.argv[1] - script_filename = sys.argv[2] - output_filename = sys.argv[3] - additional_files_path = sys.argv[4] - input_type = sys.argv[5] or 'sanger' - - #Save script file for debuging/verification info later - os.mkdir( additional_files_path ) - shutil.copy( script_filename, os.path.join( additional_files_path, 'debug.txt' ) ) - - fastq_manipulator = imp.load_module( 'fastq_manipulator', open( script_filename ), script_filename, ( '', 'r', imp.PY_SOURCE ) ) - - out = fastqWriter( open( output_filename, 'wb' ), format = input_type ) - - i = None - reads_manipulated = 0 - for i, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): - new_read = fastq_manipulator.match_and_manipulate_read( fastq_read ) - if new_read: - out.write( new_read ) - if new_read != fastq_read: - reads_manipulated += 1 - out.close() - if i is None: - print "Your file contains no valid FASTQ reads." - else: - print 'Manipulated %s of %s reads (%.2f%%).' % ( reads_manipulated, i + 1, float( reads_manipulated ) / float( i + 1 ) * 100.0 ) - -if __name__ == "__main__": - main()
--- a/fastq_manipulation.xml Fri Dec 18 19:28:39 2015 -0500 +++ b/fastq_manipulation.xml Sat Sep 30 14:56:13 2017 -0400 @@ -1,198 +1,54 @@ -<tool id="fastq_manipulation" name="Manipulate FASTQ" version="1.0.2"> - <options sanitize="False" /> <!-- This tool uses a file to rely all parameter information (actually a dynamically generated python module), we can safely not sanitize any parameters --> - <requirements> - <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> - </requirements> - <description>reads on various attributes</description> - <command interpreter="python">fastq_manipulation.py $input_file $fastq_manipulation_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'</command> - <inputs> - <!-- This tool is purposely over-engineered (e.g. Single option conditionals) to allow easy enhancement with workflow/rerun compatibility --> - <page> - <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer."/> - <!-- Match Reads --> - <repeat name="match_blocks" title="Match Reads"> - <conditional name="match_type"> - <param name="match_type_selector" type="select" label="Match Reads by"> - <option value="identifier">Name/Identifier</option> - <option value="sequence">Sequence Content</option> - <option value="quality">Quality Score Content</option> - </param> - <when value="identifier"> - <conditional name="match"> - <param name="match_selector" type="select" label="Identifier Match Type"> - <option value="regex">Regular Expression</option> - </param> - <when value="regex"> - <param type="text" name="match_by" label="Match by" value=".*" /> - </when> - </conditional> - </when> - <when value="sequence"> - <conditional name="match"> - <param name="match_selector" type="select" label="Sequence Match Type"> - <option value="regex">Regular Expression</option> - </param> - <when value="regex"> - <param type="text" name="match_by" label="Match by" value=".*" /> - </when> - </conditional> - </when> - <when value="quality"> - <conditional name="match"> - <param name="match_selector" type="select" label="Quality Match Type"> - <option value="regex">Regular Expression</option> - </param> - <when value="regex"> - <param type="text" name="match_by" label="Match by" value=".*" /> - </when> - </conditional> - </when> - </conditional> - </repeat> - <!-- Manipulate Matched Reads --> - <repeat name="manipulate_blocks" title="Manipulate Reads"> - <conditional name="manipulation_type"> - <param name="manipulation_type_selector" type="select" label="Manipulate Reads on"> - <option value="identifier">Name/Identifier</option> - <option value="sequence">Sequence Content</option> - <option value="quality">Quality Score Content</option> - <option value="miscellaneous">Miscellaneous Actions</option> - </param> - <when value="identifier"> - <conditional name="manipulation"> - <param name="manipulation_selector" type="select" label="Identifier Manipulation Type"> - <option value="translate">String Translate</option> - </param> - <when value="translate"> - <param name="from" type="text" label="From" value="" /> - <param name="to" type="text" label="To" value="" /> - </when> - </conditional> - </when> - <when value="sequence"> - <conditional name="manipulation"> - <param name="manipulation_selector" type="select" label="Sequence Manipulation Type"> - <option value="rev_comp">Reverse Complement</option> - <option value="rev_no_comp">Reverse, No Complement</option> - <option value="no_rev_comp">Complement, No Reverse</option> - <option value="trim">Trim</option> - <option value="dna_to_rna">DNA to RNA</option> - <option value="rna_to_dna">RNA to DNA</option> - <option value="translate">String Translate</option> - <option value="change_adapter">Change Adapter Base</option> - </param> - <when value="rev_comp"> - <!-- no extra settings --> - </when> - <when value="rev_no_comp"> - <!-- no extra settings --> - </when> - <when value="no_rev_comp"> - <!-- no extra settings --> - </when> - <when value="trim"> - <conditional name="offset_type"> - <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)<br>Use Percentage for variable length reads (Roche/454)"> - <option value="offsets_absolute" selected="true">Absolute Values</option> - <option value="offsets_percent">Percentage of Read Length</option> - </param> - <when value="offsets_absolute"> - <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left"> - <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/> - <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator> - </param> - <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right; use a negative value to remove everything to the right of the absolute value of the position"> - <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator> - </param> - </when> - <when value="offsets_percent"> - <param name="left_column_offset" label="Offset from 5' end" value="0" type="float"> - <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/> - </param> - <param name="right_column_offset" label="Offset from 3' end" value="0" type="float"> - <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/> - </param> - </when> - </conditional> - <param name="keep_zero_length" label="Keep reads with zero length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" selected="False"/> - </when> - <when value="dna_to_rna"> - <!-- no extra settings --> - </when> - <when value="rna_to_dna"> - <!-- no extra settings --> - </when> - <when value="translate"> - <param name="from" type="text" label="From" value="" /> - <param name="to" type="text" label="To" value="" /> - </when> - <when value="change_adapter"> - <param name="new_adapter" label="New Adapter" type="text" value="G" help="An empty string will remove the adapter base" /> - </when> - </conditional> - </when> - <when value="quality"> - <conditional name="manipulation"> - <param name="manipulation_selector" type="select" label="Quality Manipulation Type"> - <option value="translate">String Translate</option> - <!-- <option value="modify_each_score">Apply Transformation to each Score</option> Not enabled yet--> - </param> - <when value="translate"> - <param name="from" type="text" label="From" value="" /> - <param name="to" type="text" label="To" value="" /> - </when> - <when value="modify_each_score"> - <param name="map_score" type="text" label="Modify Score by" value="$score + 1" /> - </when> - </conditional> - </when> - <when value="miscellaneous"> - <conditional name="manipulation"> - <param name="manipulation_selector" type="select" label="Miscellaneous Manipulation Type"> - <option value="remove">Remove Read</option> - </param> - <when value="remove"> - <!-- no extra settings --> - </when> - </conditional> - </when> - </conditional> - </repeat> - </page> - </inputs> - <configfiles> - <configfile name="fastq_manipulation_file">##create an importable module +<tool id="fastq_manipulation" name="Manipulate FASTQ" version="1.1.1"> + <options sanitize="false" /> <!-- This tool uses a file to rely all parameter information (actually a dynamically generated python module), we can safely not sanitize any parameters --> + <description>reads on various attributes</description> + <requirements> + <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement> + </requirements> + <command><![CDATA[ +gx-fastq-manipulation '$input_file' '$fastq_manipulation_file' '$output_file' '$output_file.files_path' '${input_file.extension[len('fastq'):]}' + ]]></command> + <configfiles> + <configfile name="fastq_manipulation_file"><![CDATA[##create an importable module #import binascii +import binascii import re -import binascii -from string import maketrans +import six + +if six.PY2: + from string import maketrans +else: + maketrans = str.maketrans + + ##does read match -def match_read( fastq_read ): +def match_read(fastq_read): #for $match_block in $match_blocks: #if $match_block['match_type']['match_type_selector'] == 'identifier': - search_target = fastq_read.identifier[1:] ##don't include @ + search_target = fastq_read.identifier[1:] ##don't include @ #elif $match_block['match_type']['match_type_selector'] == 'sequence': search_target = fastq_read.sequence #elif $match_block['match_type']['match_type_selector'] == 'quality': search_target = fastq_read.quality #else: - #continue + #continue #end if - if not re.search( binascii.unhexlify( "${ binascii.hexlify( str( match_block['match_type']['match']['match_by'] ) ) }" ), search_target ): + if not re.search(binascii.unhexlify("${ binascii.hexlify(str(match_block['match_type']['match']['match_by'])) }").decode(), search_target): return False #end for return True + + ##modify matched reads -def manipulate_read( fastq_read ): +def manipulate_read(fastq_read): new_read = fastq_read.clone() #for $manipulate_block in $manipulate_blocks: #if $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'identifier': #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate': - new_read.identifier = "@%s" % new_read.identifier[1:].translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) ) + new_read.identifier = "@%s" % new_read.identifier[1:].translate(maketrans(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['from'])) }").decode(), binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['to'])) }").decode())) #end if #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'sequence': #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate': - new_read.sequence = new_read.sequence.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) ) + new_read.sequence = new_read.sequence.translate(maketrans(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['from'])) }").decode(), binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['to'])) }").decode())) #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_comp': new_read = new_read.reverse_complement() #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'rev_no_comp': @@ -201,8 +57,8 @@ new_read = new_read.complement() #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'trim': #if $manipulate_block['manipulation_type']['manipulation']['offset_type']['base_offset_type'] == 'offsets_percent': - left_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) ) - right_column_offset = int( round( float( ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] } ) / 100.0 * float( len( new_read ) ) ) ) + left_column_offset = int(round(float(${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] }) / 100.0 * float(len(new_read)))) + right_column_offset = int(round(float(${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] }) / 100.0 * float(len(new_read)))) #else left_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['left_column_offset'] } right_column_offset = ${ manipulate_block['manipulation_type']['manipulation']['offset_type']['right_column_offset'] } @@ -211,8 +67,8 @@ right_column_offset = -right_column_offset else: right_column_offset = None - new_read = new_read.slice( left_column_offset, right_column_offset ) - if not ( ${str( manipulate_block['manipulation_type']['manipulation']['keep_zero_length'] ) == 'keep_zero_length'} or len( new_read ) ): + new_read = new_read.slice(left_column_offset, right_column_offset) + if not (${str(manipulate_block['manipulation_type']['manipulation']['keep_zero_length']) == 'keep_zero_length'} or len(new_read)): return None #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'dna_to_rna': new_read = new_read.sequence_as_DNA() @@ -220,181 +76,335 @@ new_read = new_read.sequence_as_RNA() #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'change_adapter': if new_read.sequence_space == 'color': - new_read = new_read.change_adapter( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['new_adapter'] ) ) }" ) ) + new_read = new_read.change_adapter(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['new_adapter'])) }").decode()) #end if #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'quality': #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'translate': - new_read.quality = new_read.quality.translate( maketrans( binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['from'] ) ) }" ), binascii.unhexlify( "${ binascii.hexlify( str( manipulate_block['manipulation_type']['manipulation']['to'] ) ) }" ) ) ) + new_read.quality = new_read.quality.translate(maketrans(binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['from'])) }").decode(), binascii.unhexlify("${ binascii.hexlify(str(manipulate_block['manipulation_type']['manipulation']['to'])) }").decode())) #elif $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'map_score': - def score_method( score ): - raise Exception, "Unimplemented" ##This option is not yet available, need to abstract out e.g. column adding tool action: preventing users from using 'harmful' actions - new_read.quality_map( score_method ) + def score_method(score): + raise Exception("Unimplemented") ##This option is not yet available, need to abstract out e.g. column adding tool action: preventing users from using 'harmful' actions + new_read.quality_map(score_method) #end if #elif $manipulate_block['manipulation_type']['manipulation_type_selector'] == 'miscellaneous': #if $manipulate_block['manipulation_type']['manipulation']['manipulation_selector'] == 'remove': return None #end if #else: - #continue + #continue #end if #end for if new_read.description != "+": - new_read.description = "+%s" % new_read.identifier[1:] ##ensure description is still valid + new_read.description = "+%s" % new_read.identifier[1:] ##ensure description is still valid return new_read -def match_and_manipulate_read( fastq_read ): + + +def match_and_manipulate_read(fastq_read): new_read = fastq_read - if match_read( fastq_read ): - new_read = manipulate_read( fastq_read ) + if match_read(fastq_read): + new_read = manipulate_read(fastq_read) return new_read -</configfile> - </configfiles> - <outputs> - <data format="input" name="output_file" /> - </outputs> - <tests> - <!-- match all and do nothing --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value=".*" /> - <param name="manipulation_type_selector" value="identifier" /> - <param name="manipulation_selector" value="translate" /> - <param name="from" value="" /> - <param name="to" value="" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <!-- match None and do nothing --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value="STRINGDOESNOTEXIST" /> - <param name="manipulation_type_selector" value="identifier" /> - <param name="manipulation_selector" value="translate" /> - <param name="from" value="" /> - <param name="to" value="" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <!-- match all and remove --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value=".*" /> - <param name="manipulation_type_selector" value="miscellaneous" /> - <param name="manipulation_selector" value="remove" /> - <output name="output_file" file="empty_file.dat" /> - </test> - <!-- match None and remove --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value="STRINGDOESNOTEXIST" /> - <param name="manipulation_type_selector" value="miscellaneous" /> - <param name="manipulation_selector" value="remove" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <!-- match all and trim to 4 inner-most bases --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value=".*" /> - <param name="manipulation_type_selector" value="sequence" /> - <param name="manipulation_selector" value="trim" /> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="45"/> - <param name="right_column_offset" value="45"/> - <param name="keep_zero_length" value="true" /> - <output name="output_file" file="fastq_trimmer_out1.fastqsanger" /> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value=".*" /> - <param name="manipulation_type_selector" value="sequence" /> - <param name="manipulation_selector" value="trim" /> - <param name="base_offset_type" value="offsets_percent"/> - <param name="left_column_offset" value="47.87"/> - <param name="right_column_offset" value="47.87"/> - <param name="keep_zero_length" value="true" /> - <output name="output_file" file="fastq_trimmer_out1.fastqsanger" /> - </test> - <!-- match all and rev comp --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value=".*" /> - <param name="manipulation_type_selector" value="sequence" /> - <param name="manipulation_selector" value="rev_comp" /> - <output name="output_file" file="sanger_full_range_rev_comp.fastqsanger" /> - </test> - <!-- match all and rev comp, with ambiguous DNA --> - <test> - <param name="input_file" value="misc_dna_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value=".*" /> - <param name="manipulation_type_selector" value="sequence" /> - <param name="manipulation_selector" value="rev_comp" /> - <output name="output_file" file="misc_dna_as_sanger_rev_comp_1.fastqsanger" /> - </test> - <!-- match all and rev comp, with ambiguous RNA --> - <test> - <param name="input_file" value="misc_rna_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value=".*" /> - <param name="manipulation_type_selector" value="sequence" /> - <param name="manipulation_selector" value="rev_comp" /> - <output name="output_file" file="misc_rna_as_sanger_rev_comp_1.fastqsanger" /> - </test> - <!-- match first seq and rev comp --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value="FAKE0001" /> - <param name="manipulation_type_selector" value="sequence" /> - <param name="manipulation_selector" value="rev_comp" /> - <output name="output_file" file="sanger_full_range_rev_comp_1_seq.fastqsanger" /> - </test> - <!-- match first seq and rev comp: i.e. undo above --> - <test> - <param name="input_file" value="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value="FAKE0001" /> - <param name="manipulation_type_selector" value="sequence" /> - <param name="manipulation_selector" value="rev_comp" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - <!-- match all and DNA to RNA --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value=".*" /> - <param name="manipulation_type_selector" value="sequence" /> - <param name="manipulation_selector" value="dna_to_rna" /> - <output name="output_file" file="sanger_full_range_as_rna.fastqsanger" /> - </test> - <!-- match all and RNA to DNA --> - <test> - <param name="input_file" value="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" /> - <param name="match_type_selector" value="identifier" /> - <param name="match_selector" value="regex" /> - <param name="match_by" value=".*" /> - <param name="manipulation_type_selector" value="sequence" /> - <param name="manipulation_selector" value="rna_to_dna" /> - <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" /> - </test> - </tests> -<help> + ]]></configfile> + </configfiles> + <inputs> + <!-- This tool is purposely over-engineered (e.g. Single option conditionals) to allow easy enhancement with workflow/rerun compatibility --> + <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer"/> + <!-- Match Reads --> + <repeat name="match_blocks" title="Match Reads"> + <conditional name="match_type"> + <param name="match_type_selector" type="select" label="Match Reads by"> + <option value="identifier">Name/Identifier</option> + <option value="sequence">Sequence Content</option> + <option value="quality">Quality Score Content</option> + </param> + <when value="identifier"> + <conditional name="match"> + <param name="match_selector" type="select" label="Identifier Match Type"> + <option value="regex">Regular Expression</option> + </param> + <when value="regex"> + <param name="match_by" type="text" value=".*" label="Match by" /> + </when> + </conditional> + </when> + <when value="sequence"> + <conditional name="match"> + <param name="match_selector" type="select" label="Sequence Match Type"> + <option value="regex">Regular Expression</option> + </param> + <when value="regex"> + <param name="match_by" type="text" value=".*" label="Match by" /> + </when> + </conditional> + </when> + <when value="quality"> + <conditional name="match"> + <param name="match_selector" type="select" label="Quality Match Type"> + <option value="regex">Regular Expression</option> + </param> + <when value="regex"> + <param name="match_by" type="text" value=".*" label="Match by" /> + </when> + </conditional> + </when> + </conditional> + </repeat> + <!-- Manipulate Matched Reads --> + <repeat name="manipulate_blocks" title="Manipulate Reads"> + <conditional name="manipulation_type"> + <param name="manipulation_type_selector" type="select" label="Manipulate Reads on"> + <option value="identifier">Name/Identifier</option> + <option value="sequence">Sequence Content</option> + <option value="quality">Quality Score Content</option> + <option value="miscellaneous">Miscellaneous Actions</option> + </param> + <when value="identifier"> + <conditional name="manipulation"> + <param name="manipulation_selector" type="select" label="Identifier Manipulation Type"> + <option value="translate">String Translate</option> + </param> + <when value="translate"> + <param name="from" type="text" value="" label="From" /> + <param name="to" type="text" value="" label="To" /> + </when> + </conditional> + </when> + <when value="sequence"> + <conditional name="manipulation"> + <param name="manipulation_selector" type="select" label="Sequence Manipulation Type"> + <option value="rev_comp">Reverse Complement</option> + <option value="rev_no_comp">Reverse, No Complement</option> + <option value="no_rev_comp">Complement, No Reverse</option> + <option value="trim">Trim</option> + <option value="dna_to_rna">DNA to RNA</option> + <option value="rna_to_dna">RNA to DNA</option> + <option value="translate">String Translate</option> + <option value="change_adapter">Change Adapter Base</option> + </param> + <when value="rev_comp" /> + <when value="rev_no_comp" /> + <when value="no_rev_comp" /> + <when value="trim"> + <conditional name="offset_type"> + <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)<br>Use Percentage for variable length reads (Roche/454)"> + <option value="offsets_absolute" selected="true">Absolute Values</option> + <option value="offsets_percent">Percentage of Read Length</option> + </param> + <when value="offsets_absolute"> + <param name="left_column_offset" type="integer" min="0" value="0" label="Offset from 5' end" help="Values start at 0, increasing from the left" /> + <param name="right_column_offset" type="integer" value="0" label="Offset from 3' end" help="Values start at 0, increasing from the right; use a negative value to remove everything to the right of the absolute value of the position" /> + </when> + <when value="offsets_percent"> + <param name="left_column_offset" type="float" min="0" max="100" value="0" label="Offset from 5' end" /> + <param name="right_column_offset" type="float" min="0" max="100" value="0" label="Offset from 3' end" /> + </when> + </conditional> + <param name="keep_zero_length" type="boolean" truevalue="keep_zero_length" falsevalue="exclude_zero_length" checked="false" label="Keep reads with zero length" /> + </when> + <when value="dna_to_rna" /> + <when value="rna_to_dna" /> + <when value="translate"> + <param name="from" type="text" value="" label="From" /> + <param name="to" type="text" value="" label="To" /> + </when> + <when value="change_adapter"> + <param name="new_adapter" type="text" value="G" label="New adapter" help="An empty string will remove the adapter base" /> + </when> + </conditional> + </when> + <when value="quality"> + <conditional name="manipulation"> + <param name="manipulation_selector" type="select" label="Quality Manipulation Type"> + <option value="translate">String Translate</option> + <!-- <option value="modify_each_score">Apply Transformation to each Score</option> Not enabled yet--> + </param> + <when value="translate"> + <param name="from" type="text" value="" label="From" /> + <param name="to" type="text" value="" label="To" /> + </when> + <!-- <when value="modify_each_score"> + <param name="map_score" type="text" label="Modify Score by" value="$score + 1" /> + </when> --> + </conditional> + </when> + <when value="miscellaneous"> + <conditional name="manipulation"> + <param name="manipulation_selector" type="select" label="Miscellaneous Manipulation Type"> + <option value="remove">Remove Read</option> + </param> + <when value="remove" /> + </conditional> + </when> + </conditional> + </repeat> + </inputs> + <outputs> + <data name="output_file" format_source="input_file" /> + </outputs> + <tests> + <!-- match all and do nothing --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="identifier" /> + <param name="manipulation_selector" value="translate" /> + <param name="from" value="" /> + <param name="to" value="" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match None and do nothing --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value="STRINGDOESNOTEXIST" /> + <param name="manipulation_type_selector" value="identifier" /> + <param name="manipulation_selector" value="translate" /> + <param name="from" value="" /> + <param name="to" value="" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match all and remove --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="miscellaneous" /> + <param name="manipulation_selector" value="remove" /> + <output name="output_file" file="empty_file.dat" ftype="fastqsanger" /> + </test> + <!-- match None and remove --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value="STRINGDOESNOTEXIST" /> + <param name="manipulation_type_selector" value="miscellaneous" /> + <param name="manipulation_selector" value="remove" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match all and trim to 4 inner-most bases --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="trim" /> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="45"/> + <param name="right_column_offset" value="45"/> + <param name="keep_zero_length" value="true" /> + <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="trim" /> + <param name="base_offset_type" value="offsets_percent"/> + <param name="left_column_offset" value="47.87"/> + <param name="right_column_offset" value="47.87"/> + <param name="keep_zero_length" value="true" /> + <output name="output_file" file="fastq_trimmer_out1.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match all and rev comp --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="rev_comp" /> + <output name="output_file" file="sanger_full_range_rev_comp.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match all and rev comp, with ambiguous DNA --> + <test> + <param name="input_file" value="misc_dna_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="rev_comp" /> + <output name="output_file" file="misc_dna_as_sanger_rev_comp_1.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match all and rev comp, with ambiguous RNA --> + <test> + <param name="input_file" value="misc_rna_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="rev_comp" /> + <output name="output_file" file="misc_rna_as_sanger_rev_comp_1.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match first seq and rev comp --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value="FAKE0001" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="rev_comp" /> + <output name="output_file" file="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match first seq and rev comp: i.e. undo above --> + <test> + <param name="input_file" value="sanger_full_range_rev_comp_1_seq.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value="FAKE0001" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="rev_comp" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match all and DNA to RNA --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="dna_to_rna" /> + <output name="output_file" file="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match all and RNA to DNA--> + <test> + <param name="input_file" value="sanger_full_range_as_rna.fastqsanger" ftype="fastqsanger" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="rna_to_dna" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- match all and RNA to DNA (gz compressed) --> + <test> + <param name="input_file" value="sanger_full_range_as_rna.fastqsanger.gz" ftype="fastqsanger.gz" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="rna_to_dna" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger.gz" decompress="true" /> + </test> + <!-- match all and RNA to DNA (bz2 compressed) --> + <test> + <param name="input_file" value="sanger_full_range_as_rna.fastqsanger.bz2" ftype="fastqsanger.bz2" /> + <param name="match_type_selector" value="identifier" /> + <param name="match_selector" value="regex" /> + <param name="match_by" value=".*" /> + <param name="manipulation_type_selector" value="sequence" /> + <param name="manipulation_selector" value="rna_to_dna" /> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger.bz2" decompress="true" /> + </test> + </tests> + <help><![CDATA[ This tool allows you to build complex manipulations to be applied to each matching read in a FASTQ file. A read must match all matching directives in order for it to be manipulated; if a read does not match, it is output in a non-modified manner. All reads matching will have each of the specified manipulations performed upon them, in the order specified. Regular Expression Matches are made using re.search, see http://docs.python.org/library/re.html for more information. @@ -406,7 +416,6 @@ Only color space reads can have adapter bases substituted. - ----- **Example** @@ -415,12 +424,11 @@ Steps: -1. Click **Add new Match Reads** and leave the matching options set to the default (Matching by sequence name/identifier using the regular expression "\*."; thereby matching all reads). -2. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "Change Adapter Base" and set **New Adapter** to "" (an empty text field). +1. Click **Add new Match Reads** and leave the matching options set to the default (Matching by sequence name/identifier using the regular expression "\*."; thereby matching all reads). +2. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "Change Adapter Base" and set **New Adapter** to "" (an empty text field). 3. Click **Add new Manipulate Reads**, change **Manipulate Reads on** to "Sequence Content", set **Sequence Manipulation Type** to "String Translate" and set **From** to "0123." and **To** to "ACGTN". 4. Click Execute. The new history item will contained double-encoded psuedo-nucleotide space reads. - -</help> + ]]></help> <citations> <citation type="doi">10.1093/bioinformatics/btq281</citation> </citations>
--- a/tool_dependencies.xml Fri Dec 18 19:28:39 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="galaxy_sequence_utils" version="1.0.0"> - <repository changeset_revision="6334612a010e" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>