Mercurial > repos > devteam > fasta_to_tabular
annotate fasta_to_tabular.py @ 3:5b998d13de71 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit 34a6c9f94a5722bb7d2f887618aafa410a770e91"
| author | devteam |
|---|---|
| date | Mon, 02 Mar 2020 11:46:30 +0000 |
| parents | ff4751ce764d |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 # This code exists in 2 places: ~/datatypes/converters and ~/tools/fasta_tools | |
| 3 """ | |
| 4 Input: fasta (input file), tabular (output file), int (truncation of id), int (columns from description) | |
| 5 Output: tabular | |
| 6 format convert: fasta to tabular | |
| 7 """ | |
| 8 | |
|
2
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
9 import sys |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
10 |
| 0 | 11 |
|
2
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
12 def stop_err(msg): |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
13 sys.exit(msg) |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
14 |
| 0 | 15 |
| 16 def __main__(): | |
| 17 if len(sys.argv) != 5: | |
| 18 stop_err("Wrong number of argument. Expect four (fasta, tabular, truncation, columns)") | |
| 19 infile = sys.argv[1] | |
| 20 outfile = sys.argv[2] | |
|
2
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
21 keep_first = int(sys.argv[3]) |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
22 descr_split = int(sys.argv[4]) |
| 0 | 23 if keep_first == 0: |
| 24 keep_first = None | |
| 25 elif descr_split == 1: | |
|
2
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
26 # Added one for the ">" character |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
27 # (which is removed if using descr_split > 1) |
| 0 | 28 keep_first += 1 |
| 29 if descr_split < 1: | |
| 30 stop_err("Bad description split value (should be 1 or more)") | |
|
2
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
31 with open(outfile, 'w') as out, open(infile) as in_fh: |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
32 for i, line in enumerate(in_fh): |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
33 line = line.rstrip('\r\n') |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
34 if not line or line.startswith('#'): |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
35 continue |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
36 if line.startswith('>'): |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
37 # Don't want any existing tabs to trigger extra columns: |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
38 line = line.replace('\t', ' ') |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
39 if i > 0: |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
40 out.write('\n') |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
41 if descr_split == 1: |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
42 out.write(line[1:keep_first]) |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
43 else: |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
44 words = line[1:].split(None, descr_split - 1) |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
45 # apply any truncation to first word (the id) |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
46 words[0] = words[0][0:keep_first] |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
47 # pad with empty columns if required |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
48 words += [""] * (descr_split - len(words)) |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
49 out.write("\t".join(words)) |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
50 out.write('\t') |
| 0 | 51 else: |
|
2
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
52 out.write(line) |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
53 if i > 0: |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
54 out.write('\n') |
| 0 | 55 |
|
2
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
56 |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
57 if __name__ == "__main__": |
|
ff4751ce764d
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_to_tabular commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents:
0
diff
changeset
|
58 __main__() |
