Mercurial > repos > cpt_testbed > suite_work2
comparison gff3_require_phage_start.py @ 0:d5c3354c166d draft default tip
Uploaded
| author | cpt_testbed |
|---|---|
| date | Fri, 29 Apr 2022 10:33:36 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d5c3354c166d |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import sys | |
| 3 import argparse | |
| 4 import logging | |
| 5 from Bio import SeqIO | |
| 6 from CPT_GFFParser import gffParse, gffWrite | |
| 7 from gff3 import feature_lambda, feature_test_type | |
| 8 | |
| 9 logging.basicConfig(level=logging.INFO) | |
| 10 log = logging.getLogger(__name__) | |
| 11 | |
| 12 | |
| 13 def require_shinefind(gff3, fasta): | |
| 14 # Load up sequence(s) for GFF3 data | |
| 15 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta")) | |
| 16 # Parse GFF3 records | |
| 17 for record in gffParse(gff3, base_dict=seq_dict): | |
| 18 # Reopen | |
| 19 genes = list( | |
| 20 feature_lambda( | |
| 21 record.features, feature_test_type, {"type": "gene"}, subfeatures=True | |
| 22 ) | |
| 23 ) | |
| 24 good_genes = [] | |
| 25 for gene in genes: | |
| 26 cdss = list( | |
| 27 feature_lambda( | |
| 28 gene.sub_features, | |
| 29 feature_test_type, | |
| 30 {"type": "CDS"}, | |
| 31 subfeatures=False, | |
| 32 ) | |
| 33 ) | |
| 34 if len(cdss) == 0: | |
| 35 continue | |
| 36 | |
| 37 one_good_cds = False | |
| 38 for cds in cdss: | |
| 39 if cds.extract(record).seq[0:3].upper() in ("GTG", "ATG", "TTG"): | |
| 40 one_good_cds = True | |
| 41 | |
| 42 if one_good_cds: | |
| 43 good_genes.append(gene) | |
| 44 record.features = good_genes | |
| 45 record.annotations = {} | |
| 46 yield record | |
| 47 | |
| 48 | |
| 49 if __name__ == "__main__": | |
| 50 parser = argparse.ArgumentParser(description="Require specific start codons") | |
| 51 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome") | |
| 52 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations") | |
| 53 args = parser.parse_args() | |
| 54 | |
| 55 for rec in require_shinefind(**vars(args)): | |
| 56 gffWrite([rec], sys.stdout) |
