annotate utils/fasta_to_len.py @ 4:79cd53e23207 draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author devteam
date Sun, 01 Mar 2020 12:22:12 +0000
parents 86f00ebe7be7
children a51da10f8caf
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
2 """
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
3 Input: fasta, int
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
4 Output: tabular
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
5 Return titles with lengths of corresponding seq
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
6 """
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
7
4
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
8 import sys
0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
9
4
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
10 assert sys.version_info[:2] >= (2, 4)
0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
11
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
12
4
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
13 def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False):
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
14 keep_first_char = int(keep_first_char)
0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
15 fasta_title = ''
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
16 seq_len = 0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
17
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
18 # number of char to keep in the title
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
19 if keep_first_char == 0:
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
20 keep_first_char = None
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
21 else:
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
22 keep_first_char += 1
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
23
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
24 first_entry = True
4
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
25 with open(fasta_file) as in_fh, open(out_file, 'w') as out_fh:
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
26 for line in in_fh:
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
27 line = line.strip()
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
28 if not line or line.startswith('#'):
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
29 continue
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
30 if line[0] == '>':
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
31 if first_entry is False:
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
32 if keep_first_word:
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
33 fasta_title = fasta_title.split()[0]
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
34 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len))
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
35 else:
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
36 first_entry = False
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
37 fasta_title = line
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
38 seq_len = 0
0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
39 else:
4
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
40 seq_len += len(line)
0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
41
4
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
42 # last fasta-entry
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
43 if keep_first_word:
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
44 fasta_title = fasta_title.split()[0]
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
45 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len))
0
86f00ebe7be7 Imported from capsule None
devteam
parents:
diff changeset
46
4
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
47
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
48 if __name__ == "__main__":
79cd53e23207 "planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
devteam
parents: 0
diff changeset
49 compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True)