Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 21:c9f9623f1f76 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 02 Apr 2015 03:31:23 -0400 |
parents | 069419cccba4 |
children | d84c9791d8c4 |
comparison
equal
deleted
inserted
replaced
20:850857bc8605 | 21:c9f9623f1f76 |
---|---|
70 ataIndex = 0 | 70 ataIndex = 0 |
71 tatIndex = 0 | 71 tatIndex = 0 |
72 aggctatIndex = 0 | 72 aggctatIndex = 0 |
73 atagcctIndex = 0 | 73 atagcctIndex = 0 |
74 first = True | 74 first = True |
75 IDlist = [] | |
75 with open(infile, 'r') as i: | 76 with open(infile, 'r') as i: |
76 for line in i: | 77 for line in i: |
77 if first: | 78 if first: |
78 linesplt = line.split("\t") | 79 linesplt = line.split("\t") |
79 ataIndex = linesplt.index("X.a.t.a") | 80 ataIndex = linesplt.index("X.a.t.a") |
91 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] | 92 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] |
92 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 93 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
93 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 94 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
94 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 95 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
95 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 96 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
97 IDlist += [ID] | |
96 | 98 |
97 | 99 |
98 directory = outfile[:outfile.rfind("/") + 1] | 100 directory = outfile[:outfile.rfind("/") + 1] |
99 value = 0; | 101 value = 0; |
100 valuedic = dict() | 102 valuedic = dict() |
122 #for total | 124 #for total |
123 x = sum([y for x,y in curr.iteritems()]) | 125 x = sum([y for x,y in curr.iteritems()]) |
124 y = valuedic["total"] | 126 y = valuedic["total"] |
125 z = str(round(x / float(valuedic["total"]) * 100, 1)) | 127 z = str(round(x / float(valuedic["total"]) * 100, 1)) |
126 o.write("," + str(x) + "," + str(y) + "," + z + "\n") | 128 o.write("," + str(x) + "," + str(y) + "," + z + "\n") |
129 | |
130 | |
131 #for testing | |
132 seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt" | |
133 first = True | |
134 with open(seq_motif_file, 'w') as o: | |
135 for ID in IDlist: | |
136 if first: | |
137 o.write("ID\tRGYWC\tWRCY\tWA\tTW\n") | |
138 first = False | |
139 continue | |
140 print ID | |
141 print RGYWCount[ID] | |
142 print WRCYCount[ID] | |
143 print WACount[ID] | |
144 print TWCount[ID] | |
145 o.write(ID + "\t" + str(RGYWCount[ID]) + "\t" + str(WRCYCount[ID]) + "\t" + str(WACount[ID]) + "\t" + str(TWCount[ID]) + "\n") |