Mercurial > repos > devteam > fasta_compute_length
changeset 6:96c921261e41 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fasta_compute_length commit 9f0b8eb9fd7cf9e2513e7c822767153dbd4bc3b2
author | devteam |
---|---|
date | Thu, 03 Oct 2024 12:11:43 +0000 |
parents | a51da10f8caf |
children | |
files | fasta_compute_length.py fasta_compute_length.xml utils/__init__.py utils/fasta_to_len.py |
diffstat | 3 files changed, 45 insertions(+), 52 deletions(-) [+] |
line wrap: on
line diff
--- a/fasta_compute_length.py Mon Mar 02 11:45:47 2020 +0000 +++ b/fasta_compute_length.py Thu Oct 03 12:11:43 2024 +0000 @@ -1,10 +1,47 @@ #!/usr/bin/env python """ -Uses fasta_to_len converter code. +Input: fasta, int +Output: tabular +Return titles with lengths of corresponding seq """ import sys -from utils.fasta_to_len import compute_fasta_length -compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4] == 'id_only') +def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): + keep_first_char = int(keep_first_char) + fasta_title = '' + seq_len = 0 + + # number of char to keep in the title + if keep_first_char == 0: + keep_first_char = None + else: + keep_first_char += 1 + + first_entry = True + with open(fasta_file) as in_fh, open(out_file, 'w') as out_fh: + for line in in_fh: + line = line.strip() + if not line or line.startswith('#'): + continue + if line[0] == '>': + if first_entry is False: + if keep_first_word: + fasta_title = fasta_title.split()[0] + out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) + else: + first_entry = False + fasta_title = line + seq_len = 0 + else: + seq_len += len(line) + + # last fasta-entry + if keep_first_word: + fasta_title = fasta_title.split()[0] + out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) + + +if __name__ == "__main__": + compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4] == 'id_only')
--- a/fasta_compute_length.xml Mon Mar 02 11:45:47 2020 +0000 +++ b/fasta_compute_length.xml Thu Oct 03 12:11:43 2024 +0000 @@ -1,8 +1,11 @@ -<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.3" profile="16.04"> +<tool id="fasta_compute_length" name="Compute sequence length" version="1.0.4" profile="22.04"> <description></description> <requirements> - <requirement type="package" version="3.7">python</requirement> + <requirement type="package" version="3.12">python</requirement> </requirements> + <required_files> + <include path="fasta_compute_length.py"/> + </required_files> <command> #if $ref.ref_source == 'dbkey': cp '${ref.index.fields.len_path}' '$output'
--- a/utils/fasta_to_len.py Mon Mar 02 11:45:47 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -#!/usr/bin/env python -""" -Input: fasta, int -Output: tabular -Return titles with lengths of corresponding seq -""" - -import sys - - -def compute_fasta_length(fasta_file, out_file, keep_first_char, keep_first_word=False): - keep_first_char = int(keep_first_char) - fasta_title = '' - seq_len = 0 - - # number of char to keep in the title - if keep_first_char == 0: - keep_first_char = None - else: - keep_first_char += 1 - - first_entry = True - with open(fasta_file) as in_fh, open(out_file, 'w') as out_fh: - for line in in_fh: - line = line.strip() - if not line or line.startswith('#'): - continue - if line[0] == '>': - if first_entry is False: - if keep_first_word: - fasta_title = fasta_title.split()[0] - out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) - else: - first_entry = False - fasta_title = line - seq_len = 0 - else: - seq_len += len(line) - - # last fasta-entry - if keep_first_word: - fasta_title = fasta_title.split()[0] - out_fh.write("%s\t%d\n" % (fasta_title[1:keep_first_char], seq_len)) - - -if __name__ == "__main__": - compute_fasta_length(sys.argv[1], sys.argv[2], sys.argv[3], True)