Mercurial > repos > devteam > tabular_to_fasta
diff tabular_to_fasta.py @ 1:7f7a1bea4653 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/tabular_to_fasta commit 01140c0ac1a926856c55853a0028e5d44935d9e6"
author | devteam |
---|---|
date | Wed, 05 Feb 2020 15:51:26 +0000 |
parents | db61adc12770 |
children |
line wrap: on
line diff
--- a/tabular_to_fasta.py Mon May 19 10:59:53 2014 -0400 +++ b/tabular_to_fasta.py Wed Feb 05 15:51:26 2020 +0000 @@ -4,65 +4,66 @@ Output: fasta Return sequences whose lengths are within the range. """ -import sys, os +import os +import sys -assert sys.version_info[:2] >= ( 2, 4 ) -def stop_err( msg ): - sys.stderr.write( msg ) - sys.exit() +def stop_err(msg): + sys.exit(msg) + def __main__(): infile = sys.argv[1] title_col = sys.argv[2] seq_col = sys.argv[3] - outfile = sys.argv[4] + outfile = sys.argv[4] if title_col == None or title_col == 'None' or seq_col == None or seq_col == 'None': - stop_err( "Columns not specified." ) + stop_err("Columns not specified.") try: - seq_col = int( seq_col ) - 1 + seq_col = int(seq_col) - 1 except: - stop_err( "Invalid Sequence Column: %s." %str( seq_col ) ) + stop_err("Invalid Sequence Column: %s." % str(seq_col)) - title_col_list = title_col.split( ',' ) - out = open( outfile, 'w' ) + title_col_list = title_col.split(',') skipped_lines = 0 first_invalid_line = 0 invalid_line = "" i = 0 - - for i, line in enumerate( open( infile ) ): - error = False - line = line.rstrip( '\r\n' ) - if line and not line.startswith( '#' ): - fields = line.split( '\t' ) - fasta_title = [] - for j in title_col_list: - try: - j = int( j ) - 1 - fasta_title.append( fields[j] ) - except: - skipped_lines += 1 - if not invalid_line: - first_invalid_line = i + 1 - invalid_line = line - error = True - break - if not error: - try: - fasta_seq = fields[seq_col] - if fasta_title[0].startswith( ">" ): - fasta_title[0] = fasta_title[0][1:] - print >> out, ">%s\n%s" % ( "_".join( fasta_title ), fasta_seq ) - except: - skipped_lines += 1 - if not invalid_line: - first_invalid_line = i + 1 - invalid_line = line - out.close() + + with open(outfile, 'w') as out: + for i, line in enumerate(open(infile)): + error = False + line = line.rstrip('\r\n') + if line and not line.startswith('#'): + fields = line.split('\t') + fasta_title = [] + for j in title_col_list: + try: + j = int(j) - 1 + fasta_title.append(fields[j]) + except: + skipped_lines += 1 + if not invalid_line: + first_invalid_line = i + 1 + invalid_line = line + error = True + break + if not error: + try: + fasta_seq = fields[seq_col] + if fasta_title[0].startswith(">"): + fasta_title[0] = fasta_title[0][1:] + print(">%s\n%s" % ("_".join(fasta_title), fasta_seq), file=out) + except: + skipped_lines += 1 + if not invalid_line: + first_invalid_line = i + 1 + invalid_line = line if skipped_lines > 0: - print 'Data issue: skipped %d blank or invalid lines starting at #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line ) + print('Data issue: skipped %d blank or invalid lines starting at #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line)) + -if __name__ == "__main__" : __main__() \ No newline at end of file +if __name__ == "__main__": + __main__()