diff gff3_add_parents_to_cds.py @ 0:d5c3354c166d draft default tip

Uploaded
author cpt_testbed
date Fri, 29 Apr 2022 10:33:36 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gff3_add_parents_to_cds.py	Fri Apr 29 10:33:36 2022 +0000
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+import sys
+import logging
+import argparse
+from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature
+#from Bio.SeqFeature import SeqFeature
+from gff3 import feature_lambda, feature_test_type
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+def fixed_feature(rec):
+    for feature in feature_lambda(
+        rec.features, feature_test_type, {"type": "CDS"}, subfeatures=True
+    ):
+        import random
+
+        fid = feature.qualifiers["ID"][0] + "_" + str(random.random())
+        gene = gffSeqFeature(
+            location=feature.location,
+            type="gene",
+            qualifiers={"ID": [fid], "source": ["cpt.fixModel"]},
+        )
+        # Below that we have an mRNA
+        mRNA = gffSeqFeature(
+            location=feature.location,
+            type="mRNA",
+            qualifiers={"source": ["cpt.fixModel"], "ID": ["%s.mRNA" % fid], "Parent": gene.qualifiers["ID"]},
+        )
+        feature.qualifiers["ID"] = [fid + ".CDS"]
+        feature.qualifiers["Parent"] = mRNA.qualifiers["ID"]
+
+        mRNA.sub_features = [feature]
+        gene.sub_features = [mRNA]
+        yield gene
+
+
+def gff_filter(gff3):
+    for rec in gffParse(gff3):
+        rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start)
+        rec.annotations = {}
+        gffWrite([rec], sys.stdout)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="add parent gene features to CDSs")
+    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations")
+    args = parser.parse_args()
+    gff_filter(**vars(args))