0
|
1 #!/usr/bin/env python
|
|
2 import sys
|
|
3 import argparse
|
|
4 import logging
|
|
5 from Bio import SeqIO
|
|
6 from CPT_GFFParser import gffParse, gffWrite
|
|
7 from gff3 import feature_lambda, feature_test_type
|
|
8
|
|
9 logging.basicConfig(level=logging.INFO)
|
|
10 log = logging.getLogger(__name__)
|
|
11
|
|
12
|
|
13 def require_shinefind(gff3, fasta):
|
|
14 # Load up sequence(s) for GFF3 data
|
|
15 seq_dict = SeqIO.to_dict(SeqIO.parse(fasta, "fasta"))
|
|
16 # Parse GFF3 records
|
|
17 for record in gffParse(gff3, base_dict=seq_dict):
|
|
18 # Reopen
|
|
19 genes = list(
|
|
20 feature_lambda(
|
|
21 record.features, feature_test_type, {"type": "gene"}, subfeatures=True
|
|
22 )
|
|
23 )
|
|
24 good_genes = []
|
|
25 for gene in genes:
|
|
26 cdss = list(
|
|
27 feature_lambda(
|
|
28 gene.sub_features,
|
|
29 feature_test_type,
|
|
30 {"type": "CDS"},
|
|
31 subfeatures=False,
|
|
32 )
|
|
33 )
|
|
34 if len(cdss) == 0:
|
|
35 continue
|
|
36
|
|
37 one_good_cds = False
|
|
38 for cds in cdss:
|
|
39 if cds.extract(record).seq[0:3].upper() in ("GTG", "ATG", "TTG"):
|
|
40 one_good_cds = True
|
|
41
|
|
42 if one_good_cds:
|
|
43 good_genes.append(gene)
|
|
44 record.features = good_genes
|
|
45 record.annotations = {}
|
|
46 yield record
|
|
47
|
|
48
|
|
49 if __name__ == "__main__":
|
|
50 parser = argparse.ArgumentParser(description="Require specific start codons")
|
|
51 parser.add_argument("fasta", type=argparse.FileType("r"), help="Fasta Genome")
|
|
52 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
|
|
53 args = parser.parse_args()
|
|
54
|
|
55 for rec in require_shinefind(**vars(args)):
|
|
56 gffWrite([rec], sys.stdout)
|