comparison imgtconvert.py @ 5:387fce4a1dd4 draft

Uploaded
author davidvanzessen
date Mon, 07 Jul 2014 09:48:59 -0400
parents 021d39f6bb0e
children 5b030e48b308
comparison
equal deleted inserted replaced
4:021d39f6bb0e 5:387fce4a1dd4
45 for i in range(0, step): 45 for i in range(0, step):
46 triplets.append((files[i], files[i + step], files[i + step + step])) 46 triplets.append((files[i], files[i + step], files[i + step + step]))
47 47
48 outFile = args.output 48 outFile = args.output
49 49
50 fSummary = pd.read_csv(triplets[0][0], sep="\t") 50 fSummary = pd.read_csv(triplets[0][0], sep="\t", low_memory=False)
51 fSequence = pd.read_csv(triplets[0][1], sep="\t") 51 fSequence = pd.read_csv(triplets[0][1], sep="\t", low_memory=False)
52 fJunction = pd.read_csv(triplets[0][2], sep="\t") 52 fJunction = pd.read_csv(triplets[0][2], sep="\t", low_memory=False)
53 tmp = fSummary[["Sequence ID", "JUNCTION frame", "V-GENE and allele", "D-GENE and allele", "J-GENE and allele"]] 53 tmp = fSummary[["Sequence ID", "JUNCTION frame", "V-GENE and allele", "D-GENE and allele", "J-GENE and allele"]]
54 54
55 tmp["CDR1 Seq"] = fSequence["CDR1-IMGT"] 55 tmp["CDR1 Seq"] = fSequence["CDR1-IMGT"]
56 tmp["CDR1 Length"] = fSummary["CDR1-IMGT length"] 56 tmp["CDR1 Length"] = fSummary["CDR1-IMGT length"]
57 57
184 tmp = tmp.replace("null", "Out-of-frame") 184 tmp = tmp.replace("null", "Out-of-frame")
185 tmp = tmp.replace("out-of-frame", "Out-of-frame") 185 tmp = tmp.replace("out-of-frame", "Out-of-frame")
186 outFrame["VDJ Frame"] = tmp 186 outFrame["VDJ Frame"] = tmp
187 outFrame["CDR3 Length DNA"] = outFrame["CDR3 Seq DNA"].map(str).map(len) 187 outFrame["CDR3 Length DNA"] = outFrame["CDR3 Seq DNA"].map(str).map(len)
188 safeLength = lambda x: len(x) if type(x) == str else 0 188 safeLength = lambda x: len(x) if type(x) == str else 0
189 outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? 189 outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows?
190 #outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows?
190 outFrame.to_csv(outFile, sep="\t", index=False, index_label="index") 191 outFrame.to_csv(outFile, sep="\t", index=False, index_label="index")