Mercurial > repos > devteam > fasta_to_tabular
comparison fasta_to_tabular.py @ 2:ff4751ce764d draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 12:23:22 +0000 |
parents | ae709fd50581 |
children |
comparison
equal
deleted
inserted
replaced
1:5cabbe4cfaf4 | 2:ff4751ce764d |
---|---|
4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description) | 4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description) |
5 Output: tabular | 5 Output: tabular |
6 format convert: fasta to tabular | 6 format convert: fasta to tabular |
7 """ | 7 """ |
8 | 8 |
9 import sys, os | 9 import sys |
10 | 10 |
11 def stop_err( msg ): | 11 |
12 sys.stderr.write( msg ) | 12 def stop_err(msg): |
13 sys.exit() | 13 sys.exit(msg) |
14 | |
14 | 15 |
15 def __main__(): | 16 def __main__(): |
16 if len(sys.argv) != 5: | 17 if len(sys.argv) != 5: |
17 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)") | 18 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)") |
18 infile = sys.argv[1] | 19 infile = sys.argv[1] |
19 outfile = sys.argv[2] | 20 outfile = sys.argv[2] |
20 keep_first = int( sys.argv[3] ) | 21 keep_first = int(sys.argv[3]) |
21 descr_split = int( sys.argv[4] ) | 22 descr_split = int(sys.argv[4]) |
22 fasta_title = fasta_seq = '' | |
23 if keep_first == 0: | 23 if keep_first == 0: |
24 keep_first = None | 24 keep_first = None |
25 elif descr_split == 1: | 25 elif descr_split == 1: |
26 #Added one for the ">" character | 26 # Added one for the ">" character |
27 #(which is removed if using descr_split > 1) | 27 # (which is removed if using descr_split > 1) |
28 keep_first += 1 | 28 keep_first += 1 |
29 if descr_split < 1: | 29 if descr_split < 1: |
30 stop_err("Bad description split value (should be 1 or more)") | 30 stop_err("Bad description split value (should be 1 or more)") |
31 out = open( outfile, 'w' ) | 31 with open(outfile, 'w') as out, open(infile) as in_fh: |
32 for i, line in enumerate( open( infile ) ): | 32 for i, line in enumerate(in_fh): |
33 line = line.rstrip( '\r\n' ) | 33 line = line.rstrip('\r\n') |
34 if not line or line.startswith( '#' ): | 34 if not line or line.startswith('#'): |
35 continue | 35 continue |
36 if line.startswith( '>' ): | 36 if line.startswith('>'): |
37 #Don't want any existing tabs to trigger extra columns: | 37 # Don't want any existing tabs to trigger extra columns: |
38 line = line.replace('\t', ' ') | 38 line = line.replace('\t', ' ') |
39 if i > 0: | 39 if i > 0: |
40 out.write('\n') | 40 out.write('\n') |
41 if descr_split == 1: | 41 if descr_split == 1: |
42 out.write(line[1:keep_first]) | 42 out.write(line[1:keep_first]) |
43 else: | |
44 words = line[1:].split(None, descr_split - 1) | |
45 # apply any truncation to first word (the id) | |
46 words[0] = words[0][0:keep_first] | |
47 # pad with empty columns if required | |
48 words += [""] * (descr_split - len(words)) | |
49 out.write("\t".join(words)) | |
50 out.write('\t') | |
43 else: | 51 else: |
44 words = line[1:].split(None, descr_split-1) | 52 out.write(line) |
45 #apply any truncation to first word (the id) | 53 if i > 0: |
46 words[0] = words[0][0:keep_first] | 54 out.write('\n') |
47 #pad with empty columns if required | |
48 words += [""]*(descr_split-len(words)) | |
49 out.write("\t".join(words)) | |
50 out.write('\t') | |
51 else: | |
52 out.write(line) | |
53 if i > 0: | |
54 out.write('\n') | |
55 out.close() | |
56 | 55 |
57 if __name__ == "__main__" : __main__() | 56 |
57 if __name__ == "__main__": | |
58 __main__() |