Mercurial > repos > cpt_testbed > functionalworkflow
diff lipoP_to_gff3.py @ 0:f678e282b320 draft default tip
"planemo upload"
author | cpt_testbed |
---|---|
date | Fri, 06 May 2022 07:07:23 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lipoP_to_gff3.py Fri May 06 07:07:23 2022 +0000 @@ -0,0 +1,110 @@ +#!/usr/bin/env python +import sys +import copy +import argparse +from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +from Bio.SeqFeature import FeatureLocation +from gff3 import feature_lambda, feature_test_type, get_id + + +def lipoP_gff(lipoIn, gff3In, jBrowseOut, filterSP2): + + orgIDs = {} + orgID = "" + + # Take and parse the txt output into a sequence of records + # Dict of X records, with the ID as key and an array Y of each cleavage site as the value, + for row in lipoIn: + if row.startswith("#"): + orgID = "" + continue + + rowElem = row.split("\t") + + orgID = rowElem[0] + + if filterSP2: + if rowElem[2] == "CleavII": + if not (orgID in orgIDs.keys()): + orgIDs[orgID] = [] + orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4]))) + else: + if rowElem[2] in "CleavII": + if not (orgID in orgIDs.keys()): + orgIDs[orgID] = [] + orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4]))) + + + # Rebase + for gff in gffParse(gff3In): + keepSeq = [] + for xRec in gff.features: + cdss = list( + feature_lambda( + xRec.sub_features, + feature_test_type, + {"type": "CDS"}, + subfeatures=False, + ) + ) + findCleave = "" + cdsOff = 0 + for cds in cdss: + if cds.id in orgIDs: + findCleave = cds.id + break + cdsOff += 1 + if findCleave == "": + if not jBrowseOut: + keepSeq.append(xRec) + continue + + #if jBrowseOut: + # xRec.sub_features = [] + + i = 0 + for cleaveBase in orgIDs[findCleave]: + tempQuals = xRec.qualifiers.copy() + i += 1 + tempQuals["ID"] = xRec.id + "_cleavage_" + str(i) + + xRec.sub_features.append( + gffSeqFeature( + FeatureLocation( + cdss[cdsOff].location.start + (cleaveBase * 3) - 1, + cdss[cdsOff].location.start + (cleaveBase * 3) + 1, + ), + type="cleavage_site", + strand=xRec.location.strand, + qualifiers=tempQuals, + ) + ) + keepSeq.append(xRec) + + gff.features = keepSeq + gffWrite([gff], sys.stdout) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="add parent gene features to CDSs") + parser.add_argument( + "lipoIn", type=argparse.FileType("r"), help="LipoP tool's .txt output" + ) + parser.add_argument( + "gff3In", type=argparse.FileType("r"), help="GFF3 to rebase LipoP results" + ) + parser.add_argument( + "--jBrowseOut", + type=bool, + default=False, + help="Prepare Output for jBrowse instance", + ) + parser.add_argument( + "--filterSP2", + action='store_true', + help="Filter for only SPII sites", + ) + args = parser.parse_args() + lipoP_gff(**vars(args))