Mercurial > repos > devteam > tabular_to_fasta
view tabular_to_fasta.py @ 1:7f7a1bea4653 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/tabular_to_fasta commit 01140c0ac1a926856c55853a0028e5d44935d9e6"
author | devteam |
---|---|
date | Wed, 05 Feb 2020 15:51:26 +0000 |
parents | db61adc12770 |
children |
line wrap: on
line source
#!/usr/bin/env python """ Input: fasta, minimal length, maximal length Output: fasta Return sequences whose lengths are within the range. """ import os import sys def stop_err(msg): sys.exit(msg) def __main__(): infile = sys.argv[1] title_col = sys.argv[2] seq_col = sys.argv[3] outfile = sys.argv[4] if title_col == None or title_col == 'None' or seq_col == None or seq_col == 'None': stop_err("Columns not specified.") try: seq_col = int(seq_col) - 1 except: stop_err("Invalid Sequence Column: %s." % str(seq_col)) title_col_list = title_col.split(',') skipped_lines = 0 first_invalid_line = 0 invalid_line = "" i = 0 with open(outfile, 'w') as out: for i, line in enumerate(open(infile)): error = False line = line.rstrip('\r\n') if line and not line.startswith('#'): fields = line.split('\t') fasta_title = [] for j in title_col_list: try: j = int(j) - 1 fasta_title.append(fields[j]) except: skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line error = True break if not error: try: fasta_seq = fields[seq_col] if fasta_title[0].startswith(">"): fasta_title[0] = fasta_title[0][1:] print(">%s\n%s" % ("_".join(fasta_title), fasta_seq), file=out) except: skipped_lines += 1 if not invalid_line: first_invalid_line = i + 1 invalid_line = line if skipped_lines > 0: print('Data issue: skipped %d blank or invalid lines starting at #%d: "%s"' % (skipped_lines, first_invalid_line, invalid_line)) if __name__ == "__main__": __main__()