# HG changeset patch # User davidvanzessen # Date 1421078878 18000 # Node ID b8db36cfe6ad05f3d239d666d9dc8e6f3f8f0b3e # Parent 7dbc9ebcefce7fa6f6b59060e3d84786eded5aa9 Uploaded diff -r 7dbc9ebcefce -r b8db36cfe6ad RScript.r --- a/RScript.r Mon Jan 12 05:35:42 2015 -0500 +++ b/RScript.r Mon Jan 12 11:07:58 2015 -0500 @@ -31,6 +31,7 @@ outfile = args[2] #path to output file outdir = args[3] #path to output folder (html/images/data) clonaltype = args[4] #clonaltype definition, or 'none' for no unique filtering +ct = unlist(strsplit(clonaltype, ",")) species = args[5] #human or mouse locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) @@ -42,13 +43,15 @@ setwd(outdir) # remove weird rows -inputdata = inputdata[inputdata$Sample != "",] +inputdata = inputdata[inputdata$Sample != "",] #remove the allele from the V,D and J genes inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) + inputdata$clonaltype = 1:nrow(inputdata) + PRODF = inputdata UNPROD = inputdata if(filterproductive){ @@ -63,10 +66,9 @@ #remove duplicates based on the clonaltype if(clonaltype != "none"){ + clonaltype = paste(clonaltype, ",Sample", sep="") #add sample column to clonaltype, unique within samples PRODF$clonaltype = do.call(paste, c(PRODF[unlist(strsplit(clonaltype, ","))], sep = ":")) PRODF = PRODF[!duplicated(PRODF$clonaltype), ] - UNPROD$clonaltype = do.call(paste, c(UNPROD[unlist(strsplit(clonaltype, ","))], sep = ":")) - UNPROD = UNPROD[!duplicated(UNPROD$clonaltype), ] } PRODF$freq = 1 @@ -96,9 +98,8 @@ inputdata.dt = data.table(inputdata) #for speed -ct = unlist(strsplit(clonaltype, ",")) if(clonaltype == "none"){ - ct = c("ID") + ct = c("clonaltype") } inputdata.dt$samples_replicates = paste(inputdata.dt$Sample, inputdata.dt$Replicate, sep="_") @@ -471,6 +472,7 @@ { clonalityFrame = inputdata if(clonaltype != "none"){ + clonalityFrame$clonaltype = do.call(paste, c(clonalityFrame[unlist(strsplit(clonaltype, ","))], sep = ":")) clonalityFrame$ReplicateConcat = paste(clonalityFrame$clonaltype, clonalityFrame$Sample, clonalityFrame$Replicate, sep = ":") clonalityFrame = clonalityFrame[!duplicated(clonalityFrame$ReplicateConcat), ] } @@ -515,7 +517,7 @@ ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) lapply(ReplicateSplit, FUN=ReplicatePrint) - ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(Reads), ReadsSquaredSum=sum(squared)), by=c("Sample")]) + ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) @@ -558,54 +560,54 @@ if(all(imgtcolumns %in% colnames(inputdata))) { newData = data.frame(data.table(PRODF)[,list(unique=.N, - VH.DEL=mean(X3V.REGION.trimmed.nt.nb, na.rm=T), - P1=mean(P3V.nt.nb, na.rm=T), - N1=mean(N1.REGION.nt.nb, na.rm=T), - P2=mean(P5D.nt.nb, na.rm=T), - DEL.DH=mean(X5D.REGION.trimmed.nt.nb, na.rm=T), - DH.DEL=mean(X3D.REGION.trimmed.nt.nb, na.rm=T), - P3=mean(P3D.nt.nb, na.rm=T), - N2=mean(N2.REGION.nt.nb, na.rm=T), - P4=mean(P5J.nt.nb, na.rm=T), - DEL.JH=mean(X5J.REGION.trimmed.nt.nb, na.rm=T), - Total.Del=( mean(X3V.REGION.trimmed.nt.nb, na.rm=T) + - mean(X5D.REGION.trimmed.nt.nb, na.rm=T) + - mean(X3D.REGION.trimmed.nt.nb, na.rm=T) + - mean(X5J.REGION.trimmed.nt.nb, na.rm=T)), - - Total.N=( mean(N1.REGION.nt.nb, na.rm=T) + - mean(N2.REGION.nt.nb, na.rm=T)), - - Total.P=( mean(P3V.nt.nb, na.rm=T) + - mean(P5D.nt.nb, na.rm=T) + - mean(P3D.nt.nb, na.rm=T) + - mean(P5J.nt.nb, na.rm=T))), - by=c("Sample")]) + VH.DEL=mean(X3V.REGION.trimmed.nt.nb, na.rm=T), + P1=mean(P3V.nt.nb, na.rm=T), + N1=mean(N1.REGION.nt.nb, na.rm=T), + P2=mean(P5D.nt.nb, na.rm=T), + DEL.DH=mean(X5D.REGION.trimmed.nt.nb, na.rm=T), + DH.DEL=mean(X3D.REGION.trimmed.nt.nb, na.rm=T), + P3=mean(P3D.nt.nb, na.rm=T), + N2=mean(N2.REGION.nt.nb, na.rm=T), + P4=mean(P5J.nt.nb, na.rm=T), + DEL.JH=mean(X5J.REGION.trimmed.nt.nb, na.rm=T), + Total.Del=( mean(X3V.REGION.trimmed.nt.nb, na.rm=T) + + mean(X5D.REGION.trimmed.nt.nb, na.rm=T) + + mean(X3D.REGION.trimmed.nt.nb, na.rm=T) + + mean(X5J.REGION.trimmed.nt.nb, na.rm=T)), + + Total.N=( mean(N1.REGION.nt.nb, na.rm=T) + + mean(N2.REGION.nt.nb, na.rm=T)), + + Total.P=( mean(P3V.nt.nb, na.rm=T) + + mean(P5D.nt.nb, na.rm=T) + + mean(P3D.nt.nb, na.rm=T) + + mean(P5J.nt.nb, na.rm=T))), + by=c("Sample")]) write.table(newData, "junctionAnalysisProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) - newData = data.frame(data.table(UNPROD)[,list(unique=.N, - VH.DEL=mean(X3V.REGION.trimmed.nt.nb, na.rm=T), - P1=mean(P3V.nt.nb, na.rm=T), - N1=mean(N1.REGION.nt.nb, na.rm=T), - P2=mean(P5D.nt.nb, na.rm=T), - DEL.DH=mean(X5D.REGION.trimmed.nt.nb, na.rm=T), - DH.DEL=mean(X3D.REGION.trimmed.nt.nb, na.rm=T), - P3=mean(P3D.nt.nb, na.rm=T), - N2=mean(N2.REGION.nt.nb, na.rm=T), - P4=mean(P5J.nt.nb, na.rm=T), - DEL.JH=mean(X5J.REGION.trimmed.nt.nb, na.rm=T), - Total.Del=( mean(X3V.REGION.trimmed.nt.nb, na.rm=T) + - mean(X5D.REGION.trimmed.nt.nb, na.rm=T) + - mean(X3D.REGION.trimmed.nt.nb, na.rm=T) + - mean(X5J.REGION.trimmed.nt.nb, na.rm=T)), - - Total.N=( mean(N1.REGION.nt.nb, na.rm=T) + - mean(N2.REGION.nt.nb, na.rm=T)), - - Total.P=( mean(P3V.nt.nb, na.rm=T) + - mean(P5D.nt.nb, na.rm=T) + - mean(P3D.nt.nb, na.rm=T) + - mean(P5J.nt.nb, na.rm=T))), - by=c("Sample")]) + newData = data.frame(data.table(UNPROD)[,list(unique=.N, + VH.DEL=mean(X3V.REGION.trimmed.nt.nb, na.rm=T), + P1=mean(P3V.nt.nb, na.rm=T), + N1=mean(N1.REGION.nt.nb, na.rm=T), + P2=mean(P5D.nt.nb, na.rm=T), + DEL.DH=mean(X5D.REGION.trimmed.nt.nb, na.rm=T), + DH.DEL=mean(X3D.REGION.trimmed.nt.nb, na.rm=T), + P3=mean(P3D.nt.nb, na.rm=T), + N2=mean(N2.REGION.nt.nb, na.rm=T), + P4=mean(P5J.nt.nb, na.rm=T), + DEL.JH=mean(X5J.REGION.trimmed.nt.nb, na.rm=T), + Total.Del=( mean(X3V.REGION.trimmed.nt.nb, na.rm=T) + + mean(X5D.REGION.trimmed.nt.nb, na.rm=T) + + mean(X3D.REGION.trimmed.nt.nb, na.rm=T) + + mean(X5J.REGION.trimmed.nt.nb, na.rm=T)), + + Total.N=( mean(N1.REGION.nt.nb, na.rm=T) + + mean(N2.REGION.nt.nb, na.rm=T)), + + Total.P=( mean(P3V.nt.nb, na.rm=T) + + mean(P5D.nt.nb, na.rm=T) + + mean(P3D.nt.nb, na.rm=T) + + mean(P5J.nt.nb, na.rm=T))), + by=c("Sample")]) write.table(newData, "junctionAnalysisUnProd.csv" , sep=",",quote=F,na="-",row.names=F,col.names=F) }