# HG changeset patch # User davidvanzessen # Date 1404740939 14400 # Node ID 387fce4a1dd41a0b87f0283e8fb2cb53c1f17883 # Parent 021d39f6bb0ee6698460bfdd808f86aeb8813a91 Uploaded diff -r 021d39f6bb0e -r 387fce4a1dd4 imgtconvert.py --- a/imgtconvert.py Fri Jun 06 04:34:44 2014 -0400 +++ b/imgtconvert.py Mon Jul 07 09:48:59 2014 -0400 @@ -47,9 +47,9 @@ outFile = args.output -fSummary = pd.read_csv(triplets[0][0], sep="\t") -fSequence = pd.read_csv(triplets[0][1], sep="\t") -fJunction = pd.read_csv(triplets[0][2], sep="\t") +fSummary = pd.read_csv(triplets[0][0], sep="\t", low_memory=False) +fSequence = pd.read_csv(triplets[0][1], sep="\t", low_memory=False) +fJunction = pd.read_csv(triplets[0][2], sep="\t", low_memory=False) tmp = fSummary[["Sequence ID", "JUNCTION frame", "V-GENE and allele", "D-GENE and allele", "J-GENE and allele"]] tmp["CDR1 Seq"] = fSequence["CDR1-IMGT"] @@ -186,5 +186,6 @@ outFrame["VDJ Frame"] = tmp outFrame["CDR3 Length DNA"] = outFrame["CDR3 Seq DNA"].map(str).map(len) safeLength = lambda x: len(x) if type(x) == str else 0 -outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? +outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? +#outFrame = outFrame[(outFrame["CDR3 Seq DNA"].map(safeLength) > 0) & (outFrame["Top V Gene"] != "NA") & (outFrame["Top D Gene"] != "NA") & (outFrame["Top J Gene"] != "NA")] #filter out weird rows? outFrame.to_csv(outFile, sep="\t", index=False, index_label="index") diff -r 021d39f6bb0e -r 387fce4a1dd4 imgtconvert.sh --- a/imgtconvert.sh Fri Jun 06 04:34:44 2014 -0400 +++ b/imgtconvert.sh Mon Jul 07 09:48:59 2014 -0400 @@ -43,7 +43,7 @@ then echo "Zip archive" echo "Trying: unzip $1 -d $PWD/files/" - unzip $1 -d $PWD/files/ + unzip $1 -d $PWD/files/ > $PWD/unziplog.log fi if [[ "$f" == *"$rarType"* ]]