Mercurial > repos > cpt_testbed > cpt_fixsixpack
comparison gff3_fix_sixpack.py @ 0:7de6b7dab158 draft default tip
Uploaded
| author | cpt_testbed |
|---|---|
| date | Fri, 29 Apr 2022 11:34:15 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7de6b7dab158 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import sys | |
| 3 import logging | |
| 4 import argparse | |
| 5 from CPT_GFFParser import gffParse, gffWrite | |
| 6 from Bio.SeqFeature import SeqFeature | |
| 7 from gff3 import feature_lambda, feature_test_type | |
| 8 | |
| 9 logging.basicConfig(level=logging.INFO) | |
| 10 log = logging.getLogger(__name__) | |
| 11 | |
| 12 | |
| 13 def fixed_feature(rec): | |
| 14 # Get all gene features to remove the mRNAs from | |
| 15 for feature in feature_lambda( | |
| 16 rec.features, feature_test_type, {"type": "gene"}, subfeatures=True | |
| 17 ): | |
| 18 gene = feature | |
| 19 sub_features = [] | |
| 20 # Filter out mRNA subfeatures, save other ones to new gene object. | |
| 21 for sf in feature_lambda( | |
| 22 feature.sub_features, | |
| 23 feature_test_type, | |
| 24 {"type": "mRNA"}, | |
| 25 subfeatures=True, | |
| 26 invert=True, | |
| 27 ): | |
| 28 sf.qualifiers["Parent"] = gene.qualifiers["ID"] | |
| 29 sub_features.append(sf) | |
| 30 # override original subfeatures with our filtered list | |
| 31 gene.sub_features = sub_features | |
| 32 yield gene | |
| 33 | |
| 34 | |
| 35 def gff_filter(gff3): | |
| 36 for rec in gffParse(gff3): | |
| 37 rec.features = sorted(list(fixed_feature(rec)), key=lambda x: x.location.start) | |
| 38 rec.annotations = {} | |
| 39 gffWrite([rec], sys.stdout) | |
| 40 | |
| 41 | |
| 42 if __name__ == "__main__": | |
| 43 parser = argparse.ArgumentParser(description="Fix gene model from naive ORF caller") | |
| 44 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 annotations") | |
| 45 args = parser.parse_args() | |
| 46 gff_filter(**vars(args)) |
