Mercurial > repos > devteam > fasta_compute_length
view utils/fasta_to_len.py @ 4:79cd53e23207 draft
"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit cd1ed08574b749eee2a3f6e6151dbb0c8ca15bbf"
author | devteam |
---|---|
date | Sun, 01 Mar 2020 12:22:12 +0000 |
parents | 86f00ebe7be7 |
children | a51da10f8caf |
line wrap: on
line source
#!/usr/bin/env python """ Input: fasta, int Output: tabular Return titles with lengths of corresponding seq """ import sys assert sys.version_info[:2] >= (2, 4) def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): keep_first_char = int(keep_first_char) fasta_title = '' seq_len = 0 # number of char to keep in the title if keep_first_char == 0: keep_first_char = None else: keep_first_char += 1 first_entry = True with open(fasta_file) as in_fh, open(out_file, 'w') as out_fh: for line in in_fh: line = line.strip() if not line or line.startswith('#'): continue if line[0] == '>': if first_entry is False: if keep_first_word: fasta_title = fasta_title.split()[0] out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) else: first_entry = False fasta_title = line seq_len = 0 else: seq_len += len(line) # last fasta-entry if keep_first_word: fasta_title = fasta_title.split()[0] out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) if __name__ == "__main__": compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True)