Mercurial > repos > davidvanzessen > mutation_analysis
comparison mutation_analysis.py @ 26:2433a1e110e1 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Wed, 08 Apr 2015 05:25:52 -0400 |
| parents | d84c9791d8c4 |
| children | 362ef99f9405 |
comparison
equal
deleted
inserted
replaced
| 25:58a62d2c0377 | 26:2433a1e110e1 |
|---|---|
| 24 cdr1Index = 0 | 24 cdr1Index = 0 |
| 25 fr2Index = 0 | 25 fr2Index = 0 |
| 26 cdr2Index = 0 | 26 cdr2Index = 0 |
| 27 fr3Index = 0 | 27 fr3Index = 0 |
| 28 first=True | 28 first=True |
| 29 IDlist = [] | |
| 30 mutationList = [] | |
| 31 | |
| 29 with open(infile, 'r') as i: | 32 with open(infile, 'r') as i: |
| 30 for line in i: | 33 for line in i: |
| 31 if first: | 34 if first: |
| 32 linesplt = line.split("\t") | 35 linesplt = line.split("\t") |
| 33 IDIndex = linesplt.index("Sequence.ID") | 36 IDIndex = linesplt.index("Sequence.ID") |
| 47 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] | 50 mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] |
| 48 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] | 51 mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] |
| 49 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] | 52 mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] |
| 50 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] | 53 mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] |
| 51 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] | 54 mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] |
| 55 | |
| 56 mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"] | |
| 57 | |
| 58 IDlist += [ID] | |
| 59 | |
| 60 | |
| 61 AA_mutation = [0] * (int(max(mutationList, key=lambda i:int(i[4]) if i[4] else 0)[4]) + 1) | |
| 62 | |
| 63 for mutation in mutationList: | |
| 64 if mutation[4]: #if non silent mutation | |
| 65 AA_mutation[int(mutation[4])] += 1 | |
| 66 | |
| 67 print AA_mutation | |
| 68 | |
| 69 aa_mutations_file = outfile[:outfile.rindex("/")] + "/aa_mutations.txt" | |
| 70 with open(aa_mutations_file, 'w') as o: | |
| 71 o.write(",".join([str(x) for x in AA_mutation]) + "\n") | |
| 52 | 72 |
| 53 if linecount == 0: | 73 if linecount == 0: |
| 54 print "No data, exiting" | 74 print "No data, exiting" |
| 55 with open(outfile, 'w') as o: | 75 with open(outfile, 'w') as o: |
| 56 o.write("RGYW (%)," + ("0,0,0\n" * len(genes))) | 76 o.write("RGYW (%)," + ("0,0,0\n" * len(genes))) |
| 70 ataIndex = 0 | 90 ataIndex = 0 |
| 71 tatIndex = 0 | 91 tatIndex = 0 |
| 72 aggctatIndex = 0 | 92 aggctatIndex = 0 |
| 73 atagcctIndex = 0 | 93 atagcctIndex = 0 |
| 74 first = True | 94 first = True |
| 75 IDlist = [] | |
| 76 with open(infile, 'r') as i: | 95 with open(infile, 'r') as i: |
| 77 for line in i: | 96 for line in i: |
| 78 if first: | 97 if first: |
| 79 linesplt = line.split("\t") | 98 linesplt = line.split("\t") |
| 80 ataIndex = linesplt.index("X.a.t.a") | 99 ataIndex = linesplt.index("X.a.t.a") |
| 92 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] | 111 TW = [(int(x),int(y),z) for (x,y,z) in [hotspotMatcher.match(x).groups() for x in linesplt[tatIndex].split("|") if x]] |
| 93 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 112 RGYWCount[ID] = sum([1 for (x,y,z) in RGYW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
| 94 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 113 WRCYCount[ID] = sum([1 for (x,y,z) in WRCY if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
| 95 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 114 WACount[ID] = sum([1 for (x,y,z) in WA if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
| 96 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) | 115 TWCount[ID] = sum([1 for (x,y,z) in TW if z and z != "CDR3" and any([(x <= int(where) <= y) for (frm, where, to, a,b,c,d) in mutationdic[ID + "_" + z]])]) |
| 97 IDlist += [ID] | 116 |
| 98 | 117 |
| 99 | 118 |
| 100 directory = outfile[:outfile.rfind("/") + 1] | 119 directory = outfile[:outfile.rfind("/") + 1] |
| 101 value = 0 | 120 value = 0 |
| 102 valuedic = dict() | 121 valuedic = dict() |
