0
|
1 #!/usr/bin/env python
|
|
2 import sys
|
|
3 import logging
|
|
4 import argparse
|
|
5 from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
|
|
6 #from Bio.SeqFeature import SeqFeature
|
|
7 from gff3 import feature_lambda, feature_test_type
|
|
8
|
|
9 logging.basicConfig(level=logging.INFO)
|
|
10 log = logging.getLogger(__name__)
|
|
11
|
|
12
|
|
13 def fixed_feature(rec):
|
|
14 for feature in feature_lambda(
|
|
15 rec.features, feature_test_type, {"type": "CDS"}, subfeatures=True
|
|
16 ):
|
|
17 import random
|
|
18
|
|
19 fid = feature.qualifiers["ID"][0] + "_" + str(random.random())
|
|
20 gene = gffSeqFeature(
|
|
21 location=feature.location,
|
|
22 type="gene",
|
|
23 qualifiers={"ID": [fid], "source": ["cpt.fixModel"]},
|
|
24 )
|
|
25 # Below that we have an mRNA
|
|
26 mRNA = gffSeqFeature(
|
|
27 location=feature.location,
|
|
28 type="mRNA",
|
|
29 qualifiers={"source": ["cpt.fixModel"], "ID": ["%s.mRNA" % fid], "Parent": gene.qualifiers["ID"]},
|
|
30 )
|
|
31 feature.qualifiers["ID"] = [fid + ".CDS"]
|
|
32 feature.qualifiers["Parent"] = mRNA.qualifiers["ID"]
|
|
33
|
|
34 mRNA.sub_features = [feature]
|
|
35 gene.sub_features = [mRNA]
|
|
36 yield gene
|
|
37
|
|
38
|
|
39 def gff_filter(gff3):
|
|
40 for rec in gffParse(gff3):
|
|
41 rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start)
|
|
42 rec.annotations = {}
|
|
43 gffWrite([rec], sys.stdout)
|
|
44
|
|
45
|
|
46 if __name__ == "__main__":
|
|
47 parser = argparse.ArgumentParser(description="add parent gene features to CDSs")
|
|
48 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
|
|
49 args = parser.parse_args()
|
|
50 gff_filter(**vars(args))
|