Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 28:362ef99f9405 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Wed, 08 Apr 2015 10:14:46 -0400 |
| parents | 2433a1e110e1 |
| children | 7e44617c9ca4 |
comparison
equal
deleted
inserted
replaced
| 27:c9c95b96b7cc | 28:362ef99f9405 |
|---|---|
| 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] | 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] |
| 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] | 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] |
| 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] | 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] |
| 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] |
| 55 | 55 |
| 56 print mutationdic[ID + "_FR1"] | |
| 57 | |
| 56 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | 58 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] |
| 57 | 59 |
| 58 IDlist += [ID] | 60 IDlist += [ID] |
| 59 | 61 |
| 60 | 62 |
| 61 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) | 63 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) #[4] is the position of the AA mutation, None if silent |
| 62 | 64 |
| 63 for mutation in mutationList: | 65 for mutation in mutationList: |
| 64 if mutation[4]: #if non silent mutation | 66 if mutation[4]: #if non silent mutation |
| 65 AA_mutation[int(mutation[4])] += 1 | 67 AA_mutation[int(mutation[4])] += 1 |
| 66 | 68 |
| 107 ID = linesplt[IDIndex] | 109 ID = linesplt[IDIndex] |
| 108 RGYW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] | 110 RGYW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[aggctatIndex].split("|") if x]] |
| 109 WRCY = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] | 111 WRCY = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[atagcctIndex].split("|") if x]] |
| 110 WA = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]] | 112 WA = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[ataIndex].split("|") if x]] |
| 111 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] | 113 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] |
| 112 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 114 RGYWCount[ID], WRCYCount[ID], WACount[ID], TWCount[ID] = 0,0,0,0 |
| 113 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 115 for (x,y,z) in RGYW: #RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
| 114 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 116 if not z or z == "CDR3": |
| 115 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 117 continue |
| 118 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | |
| 119 if in_mutations > 0: | |
| 120 RGYWCount[ID] += 1.0 / in_mutations | |
| 121 | |
| 122 for (x,y,z) in WRCY: #WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | |
| 123 if not z or z == "CDR3": | |
| 124 continue | |
| 125 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | |
| 126 if in_mutations > 0: | |
| 127 WRCYCount[ID] += 1.0 / in_mutations | |
| 128 | |
| 129 for (x,y,z) in WA: #WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | |
| 130 if not z or z == "CDR3": | |
| 131 continue | |
| 132 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | |
| 133 if in_mutations > 0: | |
| 134 WACount[ID] += 1.0 / in_mutations | |
| 135 | |
| 136 for (x,y,z) in TW: #TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | |
| 137 if not z or z == "CDR3": | |
| 138 continue | |
| 139 in_mutations = sum([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]]) | |
| 140 if in_mutations > 0: | |
| 141 TWCount[ID] += 1.0 / in_mutations | |
| 116 | 142 |
| 117 | 143 |
| 118 | 144 |
| 119 directory = outfile[:outfile.rfind("/") + 1] | 145 directory = outfile[:outfile.rfind("/") + 1] |
| 120 value = 0 | 146 value = 0 |
| 134 for gene in genes: | 160 for gene in genes: |
| 135 geneMatcher = re.compile(".*" + gene + ".*") | 161 geneMatcher = re.compile(".*" + gene + ".*") |
| 136 if valuedic[gene] is 0: | 162 if valuedic[gene] is 0: |
| 137 o.write(",0,0,0") | 163 o.write(",0,0,0") |
| 138 else: | 164 else: |
| 139 x = sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]]) | 165 x = int(round(sum([curr[x] for x in [y for y,z in genedic.iteritems() if geneMatcher.match(z)]]))) |
| 140 y = valuedic[gene] | 166 y = valuedic[gene] |
| 141 z = str(round(x / float(valuedic[gene]) * 100, 1)) | 167 z = str(round(x / float(valuedic[gene]) * 100, 1)) |
| 142 o.write("," + str(x) + "," + str(y) + "," + z) | 168 o.write("," + str(x) + "," + str(y) + "," + z) |
| 143 #for total | 169 #for total |
| 144 x = sum([y for x,y in curr.iteritems()]) | 170 x = int(round(sum([y for x,y in curr.iteritems()]))) |
| 145 y = valuedic["total"] | 171 y = valuedic["total"] |
| 146 z = str(round(x / float(valuedic["total"]) * 100, 1)) | 172 z = str(round(x / float(valuedic["total"]) * 100, 1)) |
| 147 o.write("," + str(x) + "," + str(y) + "," + z + "\n") | 173 o.write("," + str(x) + "," + str(y) + "," + z + "\n") |
| 148 | 174 |
| 149 | 175 |
