Mercurial > repos > devteam > fastq_filter
changeset 3:ac4a365726a1 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tool_collections/galaxy_sequence_utils/fastq_filter commit f2582539542b33240234e8ea6093e25d0aee9b6a
author | devteam |
---|---|
date | Sat, 30 Sep 2017 14:55:35 -0400 |
parents | 1fe9cfa960c5 |
children | 283a55c8bcb7 |
files | fastq_filter.py fastq_filter.xml test-data/sanger_full_range_original_sanger.fastqsanger.gz tool_dependencies.xml |
diffstat | 4 files changed, 281 insertions(+), 324 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_filter.py Fri Dec 18 19:28:08 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -#Dan Blankenberg -import sys, os, shutil -from galaxy_utils.sequence.fastq import fastqReader, fastqWriter - -def main(): - #Read command line arguments - input_filename = sys.argv[1] - script_filename = sys.argv[2] - output_filename = sys.argv[3] - additional_files_path = sys.argv[4] - input_type = sys.argv[5] or 'sanger' - - #Save script file for debuging/verification info later - os.mkdir( additional_files_path ) - shutil.copy( script_filename, os.path.join( additional_files_path, 'debug.txt' ) ) - - ## Dan, Others: Can we simply drop the "format=input_type" here since it is specified in reader. - ## This optimization would cut runtime roughly in half (for my test case anyway). -John - out = fastqWriter( open( output_filename, 'wb' ), format = input_type ) - - i = None - reads_kept = 0 - execfile(script_filename, globals()) - for i, fastq_read in enumerate( fastqReader( open( input_filename ), format = input_type ) ): - ret_val = fastq_read_pass_filter( fastq_read ) ## fastq_read_pass_filter defined in script_filename - if ret_val: - out.write( fastq_read ) - reads_kept += 1 - out.close() - if i is None: - print "Your file contains no valid fastq reads." - else: - print 'Kept %s of %s reads (%.2f%%).' % ( reads_kept, i + 1, float( reads_kept ) / float( i + 1 ) * 100.0 ) - -if __name__ == "__main__": - main()
--- a/fastq_filter.xml Fri Dec 18 19:28:08 2015 -0500 +++ b/fastq_filter.xml Sat Sep 30 14:55:35 2017 -0400 @@ -1,294 +1,299 @@ -<tool id="fastq_filter" name="Filter FASTQ" version="1.0.0"> - <description>reads by quality score and length</description> - <requirements> - <requirement type="package" version="1.0.0">galaxy_sequence_utils</requirement> - </requirements> - <command interpreter="python">fastq_filter.py $input_file $fastq_filter_file $output_file $output_file.files_path '${input_file.extension[len( 'fastq' ):]}'</command> - <inputs> - <page> - <param name="input_file" type="data" format="fastqsanger,fastqcssanger" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer."/> - <param name="min_size" label="Minimum Size" value="0" type="integer"> - <validator type="in_range" message="Minimum size must be positive" min="0"/> - </param> - <param name="max_size" label="Maximum Size" value="0" type="integer" help="A maximum size less than 1 indicates no limit."/> - <param name="min_quality" label="Minimum Quality" value="0" type="float"/> - <param name="max_quality" label="Maximum Quality" value="0" type="float" help="A maximum quality less than 1 indicates no limit."/> - <param name="max_num_deviants" label="Maximum number of bases allowed outside of quality range" value="0" type="integer"> - <validator type="in_range" message="Maximum number of deviate bases must be positive" min="0"/> - </param> - <param name="paired_end" label="This is paired end data" type="boolean" truevalue="paired_end" falsevalue="single_end" checked="False"/> - <repeat name="fastq_filters" title="Quality Filter on a Range of Bases" help="The above settings do not apply to these filters."> - <conditional name="offset_type"> - <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)<br>Use Percentage for variable length reads (Roche/454)"> - <option value="offsets_absolute" selected="true">Absolute Values</option> - <option value="offsets_percent">Percentage of Read Length</option> - </param> - <when value="offsets_absolute"> - <param name="left_column_offset" label="Offset from 5' end" value="0" type="integer" help="Values start at 0, increasing from the left"> - <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/> - <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator> - </param> - <param name="right_column_offset" label="Offset from 3' end" value="0" type="integer" help="Values start at 0, increasing from the right"> - <validator type="in_range" message="Base Offsets must be positive" min="0" max="inf"/> - <validator type="expression" message="An integer is required.">int( float( value ) ) == float( value )</validator> - </param> - </when> - <when value="offsets_percent"> - <param name="left_column_offset" label="Offset from 5' end" value="0" type="float"> - <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/> - </param> - <param name="right_column_offset" label="Offset from 3' end" value="0" type="float"> - <validator type="in_range" message="Base Offsets must be between 0 and 100" min="0" max="100"/> - </param> - </when> - </conditional> - <param name="score_operation" type="select" label="Aggregate read score for specified range"> - <option value="min" selected="True">min score</option> - <option value="max">max score</option> - <option value="sum">sum of scores</option> - <option value="mean">mean of scores</option> - </param> - <param name="score_comparison" type="select" label="Keep read when aggregate score is"> - <option value=">">></option> - <option value=">=" selected="true">>=</option> - <option value="==">==</option> - <option value="<"><</option> - <option value="<="><=</option> - <sanitizer sanitize="False"/> - </param> - <param name="score" label="Quality Score" value="0" type="float" /> - </repeat> - </page> - </inputs> - <configfiles> - <configfile name="fastq_filter_file"> -def fastq_read_pass_filter( fastq_read ): - def mean( score_list ): - return float( sum( score_list ) ) / float( len( score_list ) ) - if len( fastq_read ) < $min_size: +<tool id="fastq_filter" name="Filter FASTQ" version="1.1.1"> + <description>reads by quality score and length</description> + <requirements> + <requirement type="package" version="1.1.1">galaxy_sequence_utils</requirement> + </requirements> + <command><![CDATA[ +gx-fastq-filter '$input_file' '$fastq_filter_file' '$output_file' '$output_file.files_path' '${input_file.extension[len( 'fastq' ):]}' + ]]></command> + <configfiles> + <configfile name="fastq_filter_file"><![CDATA[ +def fastq_read_pass_filter(fastq_read): + + def mean(score_list): + return float(sum(score_list)) / float(len(score_list)) + + if len(fastq_read) < $min_size: return False - if $max_size > 0 and len( fastq_read ) > $max_size: + if $max_size > 0 and len(fastq_read) > $max_size: return False num_deviates = $max_num_deviants qual_scores = fastq_read.get_decimal_quality_scores() for qual_score in qual_scores: - if qual_score < $min_quality or ( $max_quality > 0 and qual_score > $max_quality ): + if qual_score < $min_quality or ($max_quality > 0 and qual_score > $max_quality): if num_deviates == 0: return False else: num_deviates -= 1 #if not $paired_end: - qual_scores_split = [ qual_scores ] + qual_scores_split = [qual_scores] #else: - qual_scores_split = [ qual_scores[ 0:int( len( qual_scores ) / 2 ) ], qual_scores[ int( len( qual_scores ) / 2 ): ] ] + qual_scores_split = [qual_scores[0:int(len(qual_scores) / 2)], qual_scores[int(len(qual_scores) / 2): ]] #end if #for $fastq_filter in $fastq_filters: for split_scores in qual_scores_split: - left_column_offset = $fastq_filter[ 'offset_type' ][ 'left_column_offset' ] - right_column_offset = $fastq_filter[ 'offset_type' ][ 'right_column_offset' ] -#if $fastq_filter[ 'offset_type' ]['base_offset_type'] == 'offsets_percent': - left_column_offset = int( round( float( left_column_offset ) / 100.0 * float( len( split_scores ) ) ) ) - right_column_offset = int( round( float( right_column_offset ) / 100.0 * float( len( split_scores ) ) ) ) + left_column_offset = $fastq_filter['offset_type']['left_column_offset'] + right_column_offset = $fastq_filter['offset_type']['right_column_offset'] +#if $fastq_filter['offset_type']['base_offset_type'] == 'offsets_percent': + left_column_offset = int(round(float(left_column_offset) / 100.0 * float(len(split_scores)))) + right_column_offset = int(round(float(right_column_offset) / 100.0 * float(len(split_scores)))) #end if if right_column_offset > 0: - split_scores = split_scores[ left_column_offset:-right_column_offset] + split_scores = split_scores[left_column_offset:-right_column_offset] else: - split_scores = split_scores[ left_column_offset:] - if split_scores: ##if a read doesn't have enough columns, it passes by default - if not ( ${fastq_filter[ 'score_operation' ]}( split_scores ) $fastq_filter[ 'score_comparison' ] $fastq_filter[ 'score' ] ): + split_scores = split_scores[left_column_offset:] + if split_scores: ##if a read doesn't have enough columns, it passes by default + if not (${fastq_filter['score_operation']}(split_scores) $fastq_filter['score_comparison'] $fastq_filter['score']): return False #end for return True -</configfile> - </configfiles> - <outputs> - <data format="input" name="output_file" /> - </outputs> - <tests> - <!-- Do nothing filter --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="0"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="0"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="0"/> - <param name="right_column_offset" value="0"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="0"/> - <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> - </test> - <!-- crippled input types prevent this test <test> - <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="-5"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="0"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="0"/> - <param name="right_column_offset" value="0"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="-5"/> - <output name="out_file1" file="solexa_full_range_original_solexa.fastqsolexa"/> - </test> --> - <!-- No trim, so does not remove Adapter from cssanger --> - <test> - <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="0"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="0"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="0"/> - <param name="right_column_offset" value="0"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="0"/> - <output name="out_file1" file="sanger_full_range_as_cssanger.fastqcssanger"/> - </test> - <!-- Remove all Filter --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="1"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="0"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="0"/> - <param name="right_column_offset" value="0"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="0"/> - <output name="out_file1" file="empty_file.dat"/> - </test> - <!-- crippled input types prevent this test <test> - <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="-4"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="0"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="0"/> - <param name="right_column_offset" value="0"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="-5"/> - <output name="out_file1" file="empty_file.dat"/> - </test> --> - <!-- Keep all by allowing 1 deviant --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="1"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="1"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="0"/> - <param name="right_column_offset" value="0"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="0"/> - <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> - </test> - <!-- crippled input types prevent this test<test> - <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="-5"/> - <param name="max_quality" value="61"/> - <param name="max_num_deviants" value="1"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="0"/> - <param name="right_column_offset" value="0"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="-5"/> - <output name="out_file1" file="solexa_full_range_original_solexa.fastqsolexa"/> - </test> --> - <!-- Filter inner range --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="0"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="0"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="1"/> - <param name="right_column_offset" value="1"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="1"/> - <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="0"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="0"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_absolute"/> - <param name="left_column_offset" value="1"/> - <param name="right_column_offset" value="1"/> - <param name="score_operation" value="max"/> - <param name="score_comparison" value="<="/> - <param name="score" value="92"/> - <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> - </test> - <!-- percent based offsets --> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="0"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="0"/> - <param name="paired_end" value="False"/> - <param name="base_offset_type" value="offsets_percent"/> - <param name="left_column_offset" value="1.075"/> - <param name="right_column_offset" value="1.075"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="1"/> - <output name="out_file1" file="sanger_full_range_original_sanger.fastqsanger"/> - </test> - <test> - <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> - <param name="min_size" value="0"/> - <param name="max_size" value="0"/> - <param name="min_quality" value="0"/> - <param name="max_quality" value="0"/> - <param name="max_num_deviants" value="0"/> - <param name="paired_end" value="True"/> - <param name="base_offset_type" value="offsets_percent"/> - <param name="left_column_offset" value="1"/> - <param name="right_column_offset" value="1"/> - <param name="score_operation" value="min"/> - <param name="score_comparison" value=">="/> - <param name="score" value="1"/> - <output name="out_file1" file="empty_file.dat"/> - </test> - </tests> -<help> + ]]></configfile> + </configfiles> + <inputs> + <param name="input_file" type="data" format="fastqsanger,fastqcssanger,fastqsanger.gz,fastqcssanger.gz,fastqsanger.bz2,fastqcssanger.bz2" label="FASTQ File" help="Requires groomed data: if your data does not appear here try using the FASTQ groomer"/> + <param name="min_size" type="integer" min="0" value="0" label="Minimum size" /> + <param name="max_size" type="integer" value="0" label="Maximum size" help="A maximum size less than 1 indicates no limit"/> + <param name="min_quality" type="float" value="0" label="Minimum quality"/> + <param name="max_quality" type="float" value="0" label="Maximum quality" help="A maximum quality less than 1 indicates no limit"/> + <param name="max_num_deviants" type="integer" min="0" value="0" label="Maximum number of bases allowed outside of quality range" /> + <param name="paired_end" type="boolean" truevalue="paired_end" falsevalue="single_end" checked="false" label="This is paired end data" /> + <repeat name="fastq_filters" title="Quality Filter on a Range of Bases" help="The above settings do not apply to these filters"> + <conditional name="offset_type"> + <param name="base_offset_type" type="select" label="Define Base Offsets as" help="Use Absolute for fixed length reads (Illumina, SOLiD)<br>Use Percentage for variable length reads (Roche/454)"> + <option value="offsets_absolute" selected="true">Absolute Values</option> + <option value="offsets_percent">Percentage of Read Length</option> + </param> + <when value="offsets_absolute"> + <param name="left_column_offset" type="integer" min="0" value="0" label="Offset from 5' end" help="Values start at 0, increasing from the left" /> + <param name="right_column_offset" type="integer" min="0" value="0" label="Offset from 3' end" help="Values start at 0, increasing from the right" /> + </when> + <when value="offsets_percent"> + <param name="left_column_offset" type="float" min="0" max="100" value="0" label="Offset from 5' end" /> + <param name="right_column_offset" type="float" min="0" max="100" value="0" label="Offset from 3' end" /> + </when> + </conditional> + <param name="score_operation" type="select" label="Aggregate read score for specified range"> + <option value="min" selected="true">min score</option> + <option value="max">max score</option> + <option value="sum">sum of scores</option> + <option value="mean">mean of scores</option> + </param> + <param name="score_comparison" type="select" label="Keep read when aggregate score is"> + <option value=">">></option> + <option value=">=" selected="true">>=</option> + <option value="==">==</option> + <option value="<"><</option> + <option value="<="><=</option> + <sanitizer sanitize="false"/> + </param> + <param name="score" type="float" value="0" label="Quality score" /> + </repeat> + </inputs> + <outputs> + <data name="output_file" format_source="input_file" /> + </outputs> + <tests> + <!-- Do nothing filter --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="0"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="0"/> + <param name="right_column_offset" value="0"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="0"/> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- Do nothing filter compressed --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger.gz" ftype="fastqsanger.gz"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="0"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="0"/> + <param name="right_column_offset" value="0"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="0"/> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger.gz" ftype="fastqsanger.gz" decompress="true"/> + </test> + <!-- crippled input types prevent this test <test> + <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="-5"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="0"/> + <param name="right_column_offset" value="0"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="-5"/> + <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> + </test> --> + <!-- No trim, so does not remove Adapter from cssanger --> + <test> + <param name="input_file" value="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="0"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="0"/> + <param name="right_column_offset" value="0"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="0"/> + <output name="output_file" file="sanger_full_range_as_cssanger.fastqcssanger" ftype="fastqcssanger" /> + </test> + <!-- Remove all Filter --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="1"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="0"/> + <param name="right_column_offset" value="0"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="0"/> + <output name="output_file" file="empty_file.dat" ftype="fastqsanger" /> + </test> + <!-- crippled input types prevent this test <test> + <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="-4"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="0"/> + <param name="right_column_offset" value="0"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="-5"/> + <output name="output_file" file="empty_file.dat" ftype="fastqsolexa" /> + </test> --> + <!-- Keep all by allowing 1 deviant --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="1"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="1"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="0"/> + <param name="right_column_offset" value="0"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="0"/> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- crippled input types prevent this test<test> + <param name="input_file" value="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="-5"/> + <param name="max_quality" value="61"/> + <param name="max_num_deviants" value="1"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="0"/> + <param name="right_column_offset" value="0"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="-5"/> + <output name="output_file" file="solexa_full_range_original_solexa.fastqsolexa" ftype="fastqsolexa" /> + </test> --> + <!-- Filter inner range --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="0"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="1"/> + <param name="right_column_offset" value="1"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="1"/> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="0"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_absolute"/> + <param name="left_column_offset" value="1"/> + <param name="right_column_offset" value="1"/> + <param name="score_operation" value="max"/> + <param name="score_comparison" value="<="/> + <param name="score" value="92"/> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <!-- percent based offsets --> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="0"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="false"/> + <param name="base_offset_type" value="offsets_percent"/> + <param name="left_column_offset" value="1.075"/> + <param name="right_column_offset" value="1.075"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="1"/> + <output name="output_file" file="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger" /> + </test> + <test> + <param name="input_file" value="sanger_full_range_original_sanger.fastqsanger" ftype="fastqsanger"/> + <param name="min_size" value="0"/> + <param name="max_size" value="0"/> + <param name="min_quality" value="0"/> + <param name="max_quality" value="0"/> + <param name="max_num_deviants" value="0"/> + <param name="paired_end" value="true"/> + <param name="base_offset_type" value="offsets_percent"/> + <param name="left_column_offset" value="1"/> + <param name="right_column_offset" value="1"/> + <param name="score_operation" value="min"/> + <param name="score_comparison" value=">="/> + <param name="score" value="1"/> + <output name="output_file" file="empty_file.dat" ftype="fastqsanger" /> + </test> + </tests> + <help><![CDATA[ This tool allows you to build complex filters to be applied to each read in a FASTQ file. **Basic Options:** @@ -297,7 +302,7 @@ * If your data is paired-end, select the proper checkbox; this will cause each read to be internally split down the middle and filters applied to each half using the offsets specified. **Advance Options:** - * You can specify any number of advanced filters. + * You can specify any number of advanced filters. * 5' and 3' offsets are defined, starting at zero, increasing from the respective end of the reads. For example, a quality string of "ABCDEFG", with 5' and 3' offsets of 1 and 1, respectively, specified will yield "BCDEF". * You can specify either absolute offset values, or percentage offset values. *Absolute Values* based offsets are useful for fixed length reads (e.g. Illumina or SOLiD data). *Percentage of Read Length* based offsets are useful for variable length reads (e.g. 454 data). When using the percent-based method, offsets are rounded to the nearest integer. * The user specifies the aggregating action (min, max, sum, mean) to perform on the quality score values found between the specified offsets to be used with the user defined comparison operation and comparison value. @@ -308,14 +313,8 @@ .. class:: warningmark Adapter bases in color space reads are excluded from filtering. - ------- - - -</help> - -<citations> - <citation type="doi">10.1093/bioinformatics/btq281</citation> -</citations> - + ]]></help> + <citations> + <citation type="doi">10.1093/bioinformatics/btq281</citation> + </citations> </tool>
--- a/tool_dependencies.xml Fri Dec 18 19:28:08 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="galaxy_sequence_utils" version="1.0.0"> - <repository changeset_revision="6334612a010e" name="package_galaxy_utils_1_0" owner="devteam" toolshed="https://testtoolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>