Mercurial > repos > devteam > fasta_to_tabular
diff fasta_to_tabular.py @ 0:ae709fd50581 draft
Imported from capsule None
author | devteam |
---|---|
date | Mon, 19 May 2014 11:00:01 -0400 |
parents | |
children | ff4751ce764d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta_to_tabular.py Mon May 19 11:00:01 2014 -0400 @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools +""" +Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description) +Output: tabular +format convert: fasta to tabular +""" + +import sys, os + +def stop_err( msg ): + sys.stderr.write( msg ) + sys.exit() + +def __main__(): + if len(sys.argv) != 5: + stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)") + infile = sys.argv[1] + outfile = sys.argv[2] + keep_first = int( sys.argv[3] ) + descr_split = int( sys.argv[4] ) + fasta_title = fasta_seq = '' + if keep_first == 0: + keep_first = None + elif descr_split == 1: + #Added one for the ">" character + #(which is removed if using descr_split > 1) + keep_first += 1 + if descr_split < 1: + stop_err("Bad description split value (should be 1 or more)") + out = open( outfile, 'w' ) + for i, line in enumerate( open( infile ) ): + line = line.rstrip( '\r\n' ) + if not line or line.startswith( '#' ): + continue + if line.startswith( '>' ): + #Don't want any existing tabs to trigger extra columns: + line = line.replace('\t', ' ') + if i > 0: + out.write('\n') + if descr_split == 1: + out.write(line[1:keep_first]) + else: + words = line[1:].split(None, descr_split-1) + #apply any truncation to first word (the id) + words[0] = words[0][0:keep_first] + #pad with empty columns if required + words += [""]*(descr_split-len(words)) + out.write("\t".join(words)) + out.write('\t') + else: + out.write(line) + if i > 0: + out.write('\n') + out.close() + +if __name__ == "__main__" : __main__()