Mercurial > repos > devteam > fasta_compute_length
comparison fasta_compute_length.py @ 6:96c921261e41 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit 9f0b8eb9fd7cf9e2513e7c822767153dbd4bc3b2
author | devteam |
---|---|
date | Thu, 03 Oct 2024 12:11:43 +0000 |
parents | 79cd53e23207 |
children |
comparison
equal
deleted
inserted
replaced
5:a51da10f8caf | 6:96c921261e41 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 """ | 2 """ |
3 Uses fasta_to_len converter code. | 3 Input: fasta, int |
4 Output: tabular | |
5 Return titles with lengths of corresponding seq | |
4 """ | 6 """ |
5 | 7 |
6 import sys | 8 import sys |
7 from utils.fasta_to_len import compute_fasta_length | |
8 | 9 |
9 | 10 |
10 compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4] == 'id_only') | 11 def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): |
12 keep_first_char = int(keep_first_char) | |
13 fasta_title = '' | |
14 seq_len = 0 | |
15 | |
16 # number of char to keep in the title | |
17 if keep_first_char == 0: | |
18 keep_first_char = None | |
19 else: | |
20 keep_first_char += 1 | |
21 | |
22 first_entry = True | |
23 with open(fasta_file) as in_fh, open(out_file, 'w') as out_fh: | |
24 for line in in_fh: | |
25 line = line.strip() | |
26 if not line or line.startswith('#'): | |
27 continue | |
28 if line[0] == '>': | |
29 if first_entry is False: | |
30 if keep_first_word: | |
31 fasta_title = fasta_title.split()[0] | |
32 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) | |
33 else: | |
34 first_entry = False | |
35 fasta_title = line | |
36 seq_len = 0 | |
37 else: | |
38 seq_len += len(line) | |
39 | |
40 # last fasta-entry | |
41 if keep_first_word: | |
42 fasta_title = fasta_title.split()[0] | |
43 out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) | |
44 | |
45 | |
46 if __name__ == "__main__": | |
47 compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4] == 'id_only') |