Mercurial > repos > devteam > tabular_to_fasta
changeset 1:7f7a1bea4653 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/tabular_to_fasta commit 01140c0ac1a926856c55853a0028e5d44935d9e6"
author | devteam |
---|---|
date | Wed, 05 Feb 2020 15:51:26 +0000 |
parents | db61adc12770 |
children | |
files | tabular_to_fasta.py tabular_to_fasta.xml |
diffstat | 2 files changed, 83 insertions(+), 75 deletions(-) [+] |
line wrap: on
line diff
--- a/tabular_to_fasta.py Mon May 19 10:59:53 2014 -0400 +++ b/tabular_to_fasta.py Wed Feb 05 15:51:26 2020 +0000 @@ -4,65 +4,66 @@ Output: fasta Return sequences whose lengths are within the range. """ -import sys, os +import os +import sys -assert sys.version_info[:2] >= ( 2, 4 ) -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() +def stop_err(msg): + sys.exit(msg) + def __main__(): infile = sys.argv[1] title_col = sys.argv[2] seq_col = sys.argv[3] - outfile = sys.argv[4] + outfile = sys.argv[4] if title_col == None or title_col == 'None' or seq_col == None or seq_col == 'None': - stop_err( "Columns not specified." ) + stop_err("Columns not specified.") try: - seq_col = int( seq_col ) - 1 + seq_col = int(seq_col) - 1 except: - stop_err( "Invalid Sequence Column: %s." %str( seq_col ) ) + stop_err("Invalid Sequence Column: %s." % str(seq_col)) - title_col_list = title_col.split( ',' ) - out = open( outfile, 'w' ) + title_col_list = title_col.split(',') skipped_lines = 0 first_invalid_line = 0 invalid_line = "" i = 0 - - for i, line in enumerate( open( infile ) ): - error = False - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): - fields = line.split( '\t' ) - fasta_title = [] - for j in title_col_list: - try: - j = int( j ) - 1 - fasta_title.append( fields[j] ) - except: - skipped_lines += 1 - if not invalid_line: - first_invalid_line = i + 1 - invalid_line = line - error = True - break - if not error: - try: - fasta_seq = fields[seq_col] - if fasta_title[0].startswith( ">" ): - fasta_title[0] = fasta_title[0][1:] - print >> out, ">%s\n%s" % ( "_".join( fasta_title ), fasta_seq ) - except: - skipped_lines += 1 - if not invalid_line: - first_invalid_line = i + 1 - invalid_line = line - out.close() + + with open(outfile, 'w') as out: + for i, line in enumerate(open(infile)): + error = False + line = line.rstrip('\r\n') + if line and not line.startswith('#'): + fields = line.split('\t') + fasta_title = [] + for j in title_col_list: + try: + j = int(j) - 1 + fasta_title.append(fields[j]) + except: + skipped_lines += 1 + if not invalid_line: + first_invalid_line = i + 1 + invalid_line = line + error = True + break + if not error: + try: + fasta_seq = fields[seq_col] + if fasta_title[0].startswith(">"): + fasta_title[0] = fasta_title[0][1:] + print(">%s\n%s" % ("_".join(fasta_title), fasta_seq), file=out) + except: + skipped_lines += 1 + if not invalid_line: + first_invalid_line = i + 1 + invalid_line = line if skipped_lines > 0: - print 'Data issue: skipped %d blank or invalid lines starting at #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) + print('Data issue: skipped %d blank or invalid lines starting at #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line)) + -if __name__ == "__main__" : __main__() \ No newline at end of file +if __name__ == "__main__": + __main__()
--- a/tabular_to_fasta.xml Mon May 19 10:59:53 2014 -0400 +++ b/tabular_to_fasta.xml Wed Feb 05 15:51:26 2020 +0000 @@ -1,43 +1,50 @@ -<tool id="tab2fasta" name="Tabular-to-FASTA" version="1.1.0"> - <description>converts tabular file to FASTA format</description> - <command interpreter="python">tabular_to_fasta.py $input $title_col $seq_col $output </command> - <inputs> - <param name="input" type="data" format="tabular" label="Tab-delimited file"/> - <param name="title_col" type="data_column" data_ref="input" multiple="True" numerical="False" label="Title column(s)" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"/> - <param name="seq_col" type="data_column" data_ref="input" numerical="False" label="Sequence column" /> - </inputs> - <outputs> - <data name="output" format="fasta"/> - </outputs> - <tests> - <test> - <param name="input" value="solexa.tabular" /> - <param name="title_col" value="1,2,3,4" /> - <param name="seq_col" value="5" /> - <output name="output" file="tabular_to_fasta_out1.fasta" /> - </test> - </tests> - <help> - +<tool id="tab2fasta" name="Tabular-to-FASTA" version="1.1.1" profile="16.04"> + <description>converts tabular file to FASTA format</description> + <requirements> + <requirement type="package" version="3.7">python</requirement> + </requirements> + <command><![CDATA[ +python '$__tool_directory__/tabular_to_fasta.py' +'$input' +$title_col +$seq_col +'$output' + ]]></command> + <inputs> + <param name="input" type="data" format="tabular" label="Tab-delimited file"/> + <param name="title_col" type="data_column" data_ref="input" multiple="true" numerical="false" label="Title column(s)" help="Multi-select list - hold the appropriate key while clicking to select multiple columns"/> + <param name="seq_col" type="data_column" data_ref="input" numerical="false" label="Sequence column" /> + </inputs> + <outputs> + <data name="output" format="fasta"/> + </outputs> + <tests> + <test> + <param name="input" value="solexa.tabular" /> + <param name="title_col" value="1,2,3,4" /> + <param name="seq_col" value="5" /> + <output name="output" file="tabular_to_fasta_out1.fasta" /> + </test> + </tests> + <help><![CDATA[ **What it does** Converts tab delimited data into FASTA formatted sequences. ----------- - + **Example** Suppose this is a sequence file produced by Illumina (Solexa) sequencer:: - 5 300 902 419 GACTCATGATTTCTTACCTATTAGTGGTTGAACATC - 5 300 880 431 GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT - + 5 300 902 419 GACTCATGATTTCTTACCTATTAGTGGTTGAACATC + 5 300 880 431 GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT + Selecting **c3** and **c4** as the **Title column(s)** and **c5** as the **Sequence column** will result in:: - >902_419 - GACTCATGATTTCTTACCTATTAGTGGTTGAACATC - >880_431 - GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT - - </help> -</tool> \ No newline at end of file + >902_419 + GACTCATGATTTCTTACCTATTAGTGGTTGAACATC + >880_431 + GTGATATGTATGTTGACGGCCATAAGGCTGCTTCTT + ]]></help> +</tool>