Mercurial > repos > devteam > fasta_filter_by_length
changeset 4:f8a4ac568c07 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_filter_by_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 12:22:57 +0000 |
parents | 70e730a51307 |
children | |
files | fasta_filter_by_length.py fasta_filter_by_length.xml |
diffstat | 2 files changed, 80 insertions(+), 79 deletions(-) [+] |
line wrap: on
line diff
--- a/fasta_filter_by_length.py Tue Dec 17 18:13:11 2019 +0000 +++ b/fasta_filter_by_length.py Sun Mar 01 12:22:57 2020 +0000 @@ -5,48 +5,50 @@ Return sequences whose lengths are within the range. """ -import sys, os +import sys -assert sys.version_info[:2] >= ( 2, 4 ) +assert sys.version_info[:2] >= (2, 4) -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() + +def stop_err(msg): + sys.exit(msg) + def __main__(): input_filename = sys.argv[1] try: - min_length = int( sys.argv[2] ) - except: - stop_err( "Minimal length of the return sequence requires a numerical value." ) + min_length = int(sys.argv[2]) + except Exception: + stop_err("Minimal length of the return sequence requires a numerical value.") try: - max_length = int( sys.argv[3] ) - except: - stop_err( "Maximum length of the return sequence requires a numerical value." ) + max_length = int(sys.argv[3]) + except Exception: + stop_err("Maximum length of the return sequence requires a numerical value.") output_filename = sys.argv[4] - output_handle = open( output_filename, 'w' ) - tmp_size = 0 #-1 + tmp_size = 0 # -1 tmp_buf = '' at_least_one = 0 - for line in file(input_filename): - if not line or line.startswith('#'): - continue - if line[0] == '>': - if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): - output_handle.write(tmp_buf) - at_least_one = 1 - tmp_buf = line - tmp_size = 0 - else: - if max_length == 0 or tmp_size <= max_length: - tmp_size += len(line.rstrip('\r\n')) - tmp_buf += line - # final flush of buffer - if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): - output_handle.write(tmp_buf.rstrip('\r\n')) - at_least_one = 1 - output_handle.close() + with open(output_filename, 'w') as output_handle, open(input_filename, 'r') as input_handle: + for line in input_handle: + if not line or line.startswith('#'): + continue + if line[0] == '>': + if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): + output_handle.write(tmp_buf) + at_least_one = 1 + tmp_buf = line + tmp_size = 0 + else: + if max_length == 0 or tmp_size <= max_length: + tmp_size += len(line.rstrip('\r\n')) + tmp_buf += line + # final flush of buffer + if min_length <= tmp_size <= max_length or (min_length <= tmp_size and max_length == 0): + output_handle.write(tmp_buf.rstrip('\r\n')) + at_least_one = 1 if at_least_one == 0: - print "There is no sequence that falls within your range." + print("There is no sequence that falls within your range.") + -if __name__ == "__main__" : __main__() +if __name__ == "__main__": + __main__()
--- a/fasta_filter_by_length.xml Tue Dec 17 18:13:11 2019 +0000 +++ b/fasta_filter_by_length.xml Sun Mar 01 12:22:57 2020 +0000 @@ -1,35 +1,34 @@ -<tool id="fasta_filter_by_length" name="Filter sequences by length" version="1.1"> - <description></description> - <command> - python $__tool_directory__/fasta_filter_by_length.py - '$input' - $min_length - $max_length - '$output' - </command> - <inputs> - <param name="input" type="data" format="fasta" label="Fasta file"/> - <param name="min_length" type="integer" value="0" label="Minimal length" /> - <param name="max_length" type="integer" value="0" label="Maximum length" help="Setting to '0' will return all sequences longer than the 'Minimal length'"/> - </inputs> - <outputs> - <data name="output" format="fasta"/> - </outputs> - <tests> - <test> - <param name="input" value="454.fasta" /> - <param name="min_length" value="10" /> - <param name="max_length" value="0" /> - <output name="output" file="fasta_tool_filter_length_1.out" /> - </test> - <test> - <param name="input" value="4.fasta" /> - <param name="min_length" value="0" /> - <param name="max_length" value="60" /> - <output name="output" file="fasta_tool_filter_length_2.out" /> - </test> - </tests> - <help> +<tool id="fasta_filter_by_length" name="Filter sequences by length" version="1.2" profile="16.04"> + <description></description> + <requirements> + <requirement type="package" version="3.7">python</requirement> + </requirements> + <command> +python '$__tool_directory__/fasta_filter_by_length.py' '$input' $min_length $max_length '$output' + </command> + <inputs> + <param name="input" type="data" format="fasta" label="Fasta file"/> + <param name="min_length" type="integer" value="0" label="Minimal length" /> + <param name="max_length" type="integer" value="0" label="Maximum length" help="Setting to '0' will return all sequences longer than the 'Minimal length'"/> + </inputs> + <outputs> + <data name="output" format="fasta"/> + </outputs> + <tests> + <test> + <param name="input" value="454.fasta" /> + <param name="min_length" value="10" /> + <param name="max_length" value="0" /> + <output name="output" file="fasta_tool_filter_length_1.out" /> + </test> + <test> + <param name="input" value="4.fasta" /> + <param name="min_length" value="0" /> + <param name="max_length" value="60" /> + <output name="output" file="fasta_tool_filter_length_2.out" /> + </test> + </tests> + <help><![CDATA[ .. class:: infomark @@ -38,7 +37,7 @@ ----- **What it does** - + Outputs sequences greater than or equal to *Minimal length* and less than or equal to *Maximum length*. ----- @@ -47,22 +46,22 @@ Suppose you have the following FASTA formatted sequences:: - >seq1 - TCATTTAATGAC - >seq2 - ATGGC - >seq3 - TCACATGATGCCG - >seq4 - ATGGAAGC + >seq1 + TCATTTAATGAC + >seq2 + ATGGC + >seq3 + TCACATGATGCCG + >seq4 + ATGGAAGC Setting the **Minimal length** to **10**, and the **Maximum length** to **0** will return all sequences longer than 10 bp:: - >seq1 - TCATTTAATGAC - >seq3 - TCACATGATGCCG + >seq1 + TCATTTAATGAC + >seq3 + TCACATGATGCCG - </help> + ]]></help> </tool>