Mercurial > repos > davidvanzessen > imgt_loader_igg
comparison imgtconvert.py @ 5:387fce4a1dd4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 07 Jul 2014 09:48:59 -0400 |
parents | 021d39f6bb0e |
children | 5b030e48b308 |
comparison
equal
deleted
inserted
replaced
4:021d39f6bb0e | 5:387fce4a1dd4 |
---|---|
45 for i in range(0, step): | 45 for i in range(0, step): |
46 triplets.append((files[i], files[i + step], files[i + step + step])) | 46 triplets.append((files[i], files[i + step], files[i + step + step])) |
47 | 47 |
48 outFile = args.output | 48 outFile = args.output |
49 | 49 |
50 fSummary = pd.read_csv(triplets[0][0], sep="\t") | 50 fSummary = pd.read_csv(triplets[0][0], sep="\t", low_memory=False) |
51 fSequence = pd.read_csv(triplets[0][1], sep="\t") | 51 fSequence = pd.read_csv(triplets[0][1], sep="\t", low_memory=False) |
52 fJunction = pd.read_csv(triplets[0][2], sep="\t") | 52 fJunction = pd.read_csv(triplets[0][2], sep="\t", low_memory=False) |
53 tmp = fSummary[["Sequence ID", "JUNCTION frame", "V-GENE and allele", "D-GENE and allele", "J-GENE and allele"]] | 53 tmp = fSummary[["Sequence ID", "JUNCTION frame", "V-GENE and allele", "D-GENE and allele", "J-GENE and allele"]] |
54 | 54 |
55 tmp["CDR1 Seq"] = fSequence["CDR1-IMGT"] | 55 tmp["CDR1 Seq"] = fSequence["CDR1-IMGT"] |
56 tmp["CDR1 Length"] = fSummary["CDR1-IMGT length"] | 56 tmp["CDR1 Length"] = fSummary["CDR1-IMGT length"] |
57 | 57 |
184 tmp = tmp.replace("null", "Out-of-frame") | 184 tmp = tmp.replace("null", "Out-of-frame") |
185 tmp = tmp.replace("out-of-frame", "Out-of-frame") | 185 tmp = tmp.replace("out-of-frame", "Out-of-frame") |
186 outFrame["VDJ Frame"] = tmp | 186 outFrame["VDJ Frame"] = tmp |
187 outFrame["CDR3 Length DNA"] = outFrame["CDR3 Seq DNA"].map(str).map(len) | 187 outFrame["CDR3 Length DNA"] = outFrame["CDR3 Seq DNA"].map(str).map(len) |
188 safeLength = lambda x: len(x) if type(x) == str else 0 | 188 safeLength = lambda x: len(x) if type(x) == str else 0 |
189 outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? | 189 outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? |
190 #outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? | |
190 outFrame.to_csv(outFile, sep="\t", index=False, index_label="index") | 191 outFrame.to_csv(outFile, sep="\t", index=False, index_label="index") |