Mercurial > repos > cpt_testbed > functionalworkflow
comparison lipoP_to_gff3.py @ 0:f678e282b320 draft default tip
"planemo upload"
| author | cpt_testbed |
|---|---|
| date | Fri, 06 May 2022 07:07:23 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f678e282b320 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 import sys | |
| 3 import copy | |
| 4 import argparse | |
| 5 from CPT_GFFParser import gffParse, gffWrite, gffSeqFeature | |
| 6 from Bio.Seq import Seq | |
| 7 from Bio.SeqRecord import SeqRecord | |
| 8 from Bio.SeqFeature import FeatureLocation | |
| 9 from gff3 import feature_lambda, feature_test_type, get_id | |
| 10 | |
| 11 | |
| 12 def lipoP_gff(lipoIn, gff3In, jBrowseOut, filterSP2): | |
| 13 | |
| 14 orgIDs = {} | |
| 15 orgID = "" | |
| 16 | |
| 17 # Take and parse the txt output into a sequence of records | |
| 18 # Dict of X records, with the ID as key and an array Y of each cleavage site as the value, | |
| 19 for row in lipoIn: | |
| 20 if row.startswith("#"): | |
| 21 orgID = "" | |
| 22 continue | |
| 23 | |
| 24 rowElem = row.split("\t") | |
| 25 | |
| 26 orgID = rowElem[0] | |
| 27 | |
| 28 if filterSP2: | |
| 29 if rowElem[2] == "CleavII": | |
| 30 if not (orgID in orgIDs.keys()): | |
| 31 orgIDs[orgID] = [] | |
| 32 orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4]))) | |
| 33 else: | |
| 34 if rowElem[2] in "CleavII": | |
| 35 if not (orgID in orgIDs.keys()): | |
| 36 orgIDs[orgID] = [] | |
| 37 orgIDs[orgID].append(int(rowElem[3])) # , int(rowElem[4]))) | |
| 38 | |
| 39 | |
| 40 # Rebase | |
| 41 for gff in gffParse(gff3In): | |
| 42 keepSeq = [] | |
| 43 for xRec in gff.features: | |
| 44 cdss = list( | |
| 45 feature_lambda( | |
| 46 xRec.sub_features, | |
| 47 feature_test_type, | |
| 48 {"type": "CDS"}, | |
| 49 subfeatures=False, | |
| 50 ) | |
| 51 ) | |
| 52 findCleave = "" | |
| 53 cdsOff = 0 | |
| 54 for cds in cdss: | |
| 55 if cds.id in orgIDs: | |
| 56 findCleave = cds.id | |
| 57 break | |
| 58 cdsOff += 1 | |
| 59 if findCleave == "": | |
| 60 if not jBrowseOut: | |
| 61 keepSeq.append(xRec) | |
| 62 continue | |
| 63 | |
| 64 #if jBrowseOut: | |
| 65 # xRec.sub_features = [] | |
| 66 | |
| 67 i = 0 | |
| 68 for cleaveBase in orgIDs[findCleave]: | |
| 69 tempQuals = xRec.qualifiers.copy() | |
| 70 i += 1 | |
| 71 tempQuals["ID"] = xRec.id + "_cleavage_" + str(i) | |
| 72 | |
| 73 xRec.sub_features.append( | |
| 74 gffSeqFeature( | |
| 75 FeatureLocation( | |
| 76 cdss[cdsOff].location.start + (cleaveBase * 3) - 1, | |
| 77 cdss[cdsOff].location.start + (cleaveBase * 3) + 1, | |
| 78 ), | |
| 79 type="cleavage_site", | |
| 80 strand=xRec.location.strand, | |
| 81 qualifiers=tempQuals, | |
| 82 ) | |
| 83 ) | |
| 84 keepSeq.append(xRec) | |
| 85 | |
| 86 gff.features = keepSeq | |
| 87 gffWrite([gff], sys.stdout) | |
| 88 | |
| 89 | |
| 90 if __name__ == "__main__": | |
| 91 parser = argparse.ArgumentParser(description="add parent gene features to CDSs") | |
| 92 parser.add_argument( | |
| 93 "lipoIn", type=argparse.FileType("r"), help="LipoP tool's .txt output" | |
| 94 ) | |
| 95 parser.add_argument( | |
| 96 "gff3In", type=argparse.FileType("r"), help="GFF3 to rebase LipoP results" | |
| 97 ) | |
| 98 parser.add_argument( | |
| 99 "--jBrowseOut", | |
| 100 type=bool, | |
| 101 default=False, | |
| 102 help="Prepare Output for jBrowse instance", | |
| 103 ) | |
| 104 parser.add_argument( | |
| 105 "--filterSP2", | |
| 106 action='store_true', | |
| 107 help="Filter for only SPII sites", | |
| 108 ) | |
| 109 args = parser.parse_args() | |
| 110 lipoP_gff(**vars(args)) |
