diff mutation_analysis.py @ 110:ade5cf6fd2dc draft

Uploaded
author davidvanzessen
date Tue, 02 Aug 2016 08:30:23 -0400
parents 6add3e66f4fa
children
line wrap: on
line diff
--- a/mutation_analysis.py	Thu Jul 14 07:29:56 2016 -0400
+++ b/mutation_analysis.py	Tue Aug 02 08:30:23 2016 -0400
@@ -22,6 +22,7 @@
 
 mutationdic = dict()
 mutationMatcher = re.compile("^(.)(\d+).(.),?(.)?(\d+)?.?(.)?(.?.?.?.?.?)?")
+NAMatchResult = (None, None, None, None, None, None, '')
 linecount = 0
 
 IDIndex = 0
@@ -58,15 +59,19 @@
 		ID = linesplt[IDIndex]
 		genedic[ID] = linesplt[best_matchIndex]
 		try:
-			mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else []
-			mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x]
-			mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x]
-			mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x]
+			if linesplt[fr1Index] != "NA":
+				mutationdic[ID + "_FR1"] = [mutationMatcher.match(x).groups() for x in linesplt[fr1Index].split("|") if x] if include_fr1 else []
+			else:
+				mutationdic[ID + "_FR1"] = []
+			mutationdic[ID + "_CDR1"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr1Index].split("|") if x] if linesplt[cdr1Index] != "NA" else []
+			mutationdic[ID + "_FR2"] = [mutationMatcher.match(x).groups() for x in linesplt[fr2Index].split("|") if x] if linesplt[fr2Index] != "NA" else []
+			mutationdic[ID + "_CDR2"] = [mutationMatcher.match(x).groups() for x in linesplt[cdr2Index].split("|") if x] if linesplt[cdr2Index] != "NA" else []
 			mutationdic[ID + "_FR2-CDR2"] = mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"]
-			mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x]
-		except:
+			mutationdic[ID + "_FR3"] = [mutationMatcher.match(x).groups() for x in linesplt[fr3Index].split("|") if x] if linesplt[fr3Index] != "NA" else []
+		except e:
 			print linesplt
 			print linecount
+			print e
 		mutationList += mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 		mutationListByID[ID] = mutationdic[ID + "_FR1"] + mutationdic[ID + "_CDR1"] + mutationdic[ID + "_FR2"] + mutationdic[ID + "_CDR2"] + mutationdic[ID + "_FR3"]
 
@@ -79,6 +84,8 @@
 		IDlist += [ID]
 
 AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1)  # [4] is the position of the AA mutation, None if silent
+if AALength < 60:
+	AALength = 64
 
 AA_mutation = [0] * AALength
 AA_mutation_dic = {"ca": AA_mutation[:], "cg": AA_mutation[:], "cm": AA_mutation[:], "un": AA_mutation[:]}