comparison mutation_analysis.py @ 21:c9f9623f1f76 draft

Uploaded
author davidvanzessen
date Thu, 02 Apr 2015 03:31:23 -0400
parents 069419cccba4
children d84c9791d8c4
comparison
equal deleted inserted replaced
20:850857bc8605 21:c9f9623f1f76
70 ataIndex = 0 70 ataIndex = 0
71 tatIndex = 0 71 tatIndex = 0
72 aggctatIndex = 0 72 aggctatIndex = 0
73 atagcctIndex = 0 73 atagcctIndex = 0
74 first = True 74 first = True
75 IDlist = []
75 with open(infile, 'r') as i: 76 with open(infile, 'r') as i:
76 for line in i: 77 for line in i:
77 if first: 78 if first:
78 linesplt = line.split("\t") 79 linesplt = line.split("\t")
79 ataIndex = linesplt.index("X.a.t.a") 80 ataIndex = linesplt.index("X.a.t.a")
91 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] 92 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]]
92 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 93 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
93 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 94 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
94 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 95 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
95 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) 96 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])])
97 IDlist += [ID]
96 98
97 99
98 directory = outfile[:outfile.rfind("/") + 1] 100 directory = outfile[:outfile.rfind("/") + 1]
99 value = 0; 101 value = 0;
100 valuedic = dict() 102 valuedic = dict()
122 #for total 124 #for total
123 x = sum([y for x,y in curr.iteritems()]) 125 x = sum([y for x,y in curr.iteritems()])
124 y = valuedic["total"] 126 y = valuedic["total"]
125 z = str(round(x / float(valuedic["total"]) * 100, 1)) 127 z = str(round(x / float(valuedic["total"]) * 100, 1))
126 o.write("," + str(x) + "," + str(y) + "," + z + "\n") 128 o.write("," + str(x) + "," + str(y) + "," + z + "\n")
129
130
131 #for testing
132 seq_motif_file = outfile[:outfile.rindex("/")] + "/motif_per_seq.txt"
133 first = True
134 with open(seq_motif_file, 'w') as o:
135 for ID in IDlist:
136 if first:
137 o.write("ID\tRGYWC\tWRCY\tWA\tTW\n")
138 first = False
139 continue
140 print ID
141 print RGYWCount[ID]
142 print WRCYCount[ID]
143 print WACount[ID]
144 print TWCount[ID]
145 o.write(ID + "\t" + str(RGYWCount[ID]) + "\t" + str(WRCYCount[ID]) + "\t" + str(WACount[ID]) + "\t" + str(TWCount[ID]) + "\n")