| 3 | 1 # Author: jfmartin | 
|  | 2 # Modified by : mpetera | 
|  | 3 ############################################################################### | 
|  | 4 # Correction of analytical effects inter and intra batch on intensities using quality control pooled samples (QC-pools) | 
|  | 5 # according to the algorithm mentioned by Van der Kloet (J Prot Res 2009). | 
|  | 6 # Parameters : a dataframe of Ions intensities and an other of samples? metadata which must contains at least the three following columns : | 
|  | 7 #   "batch" to identify the batches of analyses ; need at least 3 QC-pools for linear adjustment and 8 for lo(w)ess adjustment | 
|  | 8 #   "injectionOrder" integer defining the injection order of all samples : QC-pools and analysed samples | 
|  | 9 #   "sampleType" indicates if defining a sample with "sample" or a QC-pool with "pool" | 
|  | 10 # NO MISSING DATA are allowed | 
|  | 11 # Version 0.91 insertion of ok_norm function to assess correction feasibility | 
|  | 12 # Version 0.92 insertion of slope test in ok_norm | 
|  | 13 # Version 0.93 name of log file define as a parameter of the correction function | 
|  | 14 # Version 0.94 Within a batch, test if all QCpools or samples values = 0. Definition of an error code in ok_norm function (see function for details) | 
|  | 15 # Version 0.99 include non linear lowess correction. | 
|  | 16 # Version 1.00 the corrected result matrix is return transposed in Galaxy | 
|  | 17 # Version 1.01 standard deviation=0 instead of sum of value=0 is used to assess constant data in ok_norm function. Negative values in corrected matrix are converted to 0. | 
|  | 18 # Version 1.02 plotsituation create a result file with the error code of non execution of correction set by function ok_norm | 
|  | 19 # Version 1.03 fix bug in plot with "reg" option. suppression of ok_norm=4 condition if ok_norm function | 
|  | 20 # Version 2.00 Addition of loess function, correction indicator, plots ; modification of returned objects' format, some plots' displays and ok_norm ifelse format | 
|  | 21 # Version 2.01 Correction for pools negative values earlier in norm_QCpool | 
|  | 22 # Version 2.10 Script refreshing ; vocabulary adjustment ; span in parameters for lo(w)ess regression ; conditionning for third line ACP display ; order in loess display | 
|  | 23 # Version 2.11 ok1 and ok2 permutation (ok_norm) ; conditional display of regression (plotsituation) ; grouping of linked lignes + conditioning (normX) ; conditioning for CVplot | 
|  | 24 # Version 2.20 acplight function added from previous toolBox.R [# Version 1.01 "NA"-coding possibility added in acplight function] | 
|  | 25 # Version 2.30 addition of suppressWarnings() for known and controlled warnings ; suppression of one useless "cat" message ; change in Rdata names ; 'batch(es)' in cat | 
|  | 26 # Version 2.90 change in handling of generated negative and Inf values | 
|  | 27 # Version 2.91 Plot improvement | 
|  | 28 | 
|  | 29 ok_norm=function(qcp,qci,spl,spi,method) { | 
|  | 30   # Function used for one ion within one batch to determine whether or not batch correction is possible | 
|  | 31   # ok_norm values : | 
|  | 32   #   0 : no preliminary-condition problem | 
|  | 33   #   1 : standard deviation of QC-pools or samples = 0 | 
|  | 34   #   2 : insufficient number of QC-pools within a batch (n=3 for linear, n=8 for lowess or loess) | 
|  | 35   #   3 : significant difference between QC-pools' and samples' means | 
|  | 36   #   4 : denominator =0 when on 1 pool per batch <> 0 | 
|  | 37   #   5 : (linear regression only) the slopes ratio ?QC-pools/samples? is lower than -0.2 | 
|  | 38 | 
|  | 39   ok=0 | 
|  | 40   if (method=="linear") {minQC=3} else {minQC=8} | 
|  | 41   if (length(qcp)<minQC) { ok=2 | 
|  | 42   } else { | 
|  | 43     if (sd(qcp)==0 | sd(spl)==0) { ok=1 | 
|  | 44     } else { | 
|  | 45     cvp= sd(qcp)/mean(qcp); cvs=sd(spl)/mean(spl) | 
|  | 46     rttest=t.test(qcp,y=spl) | 
|  | 47     reslsfit=lsfit(qci, qcp) | 
|  | 48     reslsfitSample=lsfit(spl, spi) | 
|  | 49     ordori=reslsfit$coefficients[1] | 
|  | 50     penteB=reslsfit$coefficients[2] | 
|  | 51     penteS=reslsfitSample$coefficients[2] | 
|  | 52     # Significant difference between samples and pools | 
|  | 53     if (rttest$p.value < 0.01) { ok=3 | 
|  | 54     } else { | 
|  | 55       # to avoid denominator =0 when on 1 pool per batch <> 0 | 
|  | 56       if (method=="linear" & length(which(((penteB*qci)+ordori)==0))>0 ){ ok=6 | 
|  | 57       } else { | 
|  | 58       # different sloop between samples and pools | 
|  | 59       if (method=="linear" & penteB/penteS < -0.20) { ok=5 } | 
|  | 60   }}}} | 
|  | 61   ok_norm=ok | 
|  | 62 } | 
|  | 63 | 
|  | 64 plotsituation <- function (x, nbid,outfic="plot_regression.pdf", outres="PreNormSummary.txt",fact=args$batch_col_name,span="none") { | 
|  | 65     #	Check for all ions in every batch if linear or lo(w)ess correction is possible. | 
|  | 66     #	Use ok_norm function and create a file (PreNormSummary.txt) with the error code. | 
|  | 67     #	Also create a pdf file with plots of linear and lo(w)ess regression lines. | 
|  | 68     #	x: dataframe with ions in columns and samples in rows ; x is the result of concatenation of sample metadata file and ions file | 
|  | 69     #	nbid: number of samples description columns (id and factors) with at least : "batch","injectionOrder","sampleType" | 
|  | 70     #	outfic: name of regression plots pdf file | 
|  | 71     #	fact: factor to be used as categorical variable for plots and PCA. | 
|  | 72     indfact	=which(dimnames(x)[[2]]==fact) | 
|  | 73     indtypsamp	=which(dimnames(x)[[2]]==args$sample_type_col_name) | 
|  | 74     indbatch	=which(dimnames(x)[[2]]==args$batch_col_name) | 
|  | 75     indinject	=which(dimnames(x)[[2]]==args$injection_order_col_name) | 
|  | 76     lastIon=dim(x)[2] | 
|  | 77     nbi=lastIon-nbid # Number of ions = total number of columns - number of identifying columns | 
|  | 78     nbb=length(levels(x[[args$batch_col_name]])) # Number of batch = number of levels of "batch" comlumn (factor) | 
|  | 79     nbs=length(x[[args$sample_type_col_name]][x[[args$sample_type_col_name]]==args$sample_type_tags$sample])# Number of samples = number of rows with "sample" value in sampleType | 
|  | 80     pdf(outfic,width=27,height=7*ceiling((nbb+2)/3)) | 
|  | 81     cat(nbi," ions ",nbb," batch(es) \n") | 
|  | 82     cv=data.frame(matrix(0,nrow=nbi,ncol=2))# initialisation de la dataset qui contiendra les CV | 
|  | 83     pre_bilan=matrix(0,nrow=nbi,ncol=3*nbb) # dataset of ok_norm function results | 
|  | 84     for (p in 1:nbi) {# for each ion | 
|  | 85         par (mfrow=c(ceiling((nbb+2)/3),3),ask=F,cex=1.2) | 
|  | 86         labion=dimnames(x)[[2]][p+nbid] | 
|  | 87         indpool=which(x[[args$sample_type_col_name]]==args$sample_type_tags$pool) # QCpools subscripts in x | 
|  | 88         pools1=x[indpool,p+nbid]; cv[p,1]=sd(pools1)/mean(pools1)# CV before correction | 
|  | 89         for (b in 1:nbb) {# for each batch... | 
|  | 90             xb=data.frame(x[(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b]),c(indtypsamp,indinject,p+nbid)]) | 
|  | 91             indpb = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$pool)# QCpools subscripts in the current batch | 
|  | 92             indsp = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample)# samples subscripts in the current batch | 
|  | 93             indbt = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample | xb[[args$sample_type_col_name]]==args$sample_type_tags$pool)# indices de tous les samples d'un batch pools+samples | 
|  | 94             normLinearTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="linear") | 
|  | 95             normLoessTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="loess") | 
|  | 96             normLowessTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="lowess") | 
|  | 97             #cat(dimnames(x)[[2]][p+nbid]," batch ",b," loess ",normLoessTest," linear ",normLinearTest,"\n") | 
|  | 98             pre_bilan[ p,3*b-2]=normLinearTest | 
|  | 99             pre_bilan[ p,3*b-1]=normLoessTest | 
|  | 100             pre_bilan[ p,3*b]=normLowessTest | 
|  | 101           if(length(indpb)>1){ | 
|  | 102             if(span=="none"){span1<-1 ; span2<-2*length(indpool)/nbs}else{span1<-span ; span2<-span} | 
|  | 103             resloess=loess(xb[indpb,3]~xb[indpb,2],span=span1,degree=2,family="gaussian",iterations=4,surface="direct") | 
|  | 104             resloessSample=loess(xb[indsp,3]~xb[indsp,2],span=2*length(indpool)/nbs,degree=2,family="gaussian",iterations=4,surface="direct") | 
|  | 105             reslowess=lowess(xb[indpb,2],xb[indpb,3],f=span2) | 
|  | 106             reslowessSample=lowess(xb[indsp,2],xb[indsp,3]) | 
|  | 107             liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3]) | 
|  | 108             plot(xb[indsp,2],xb[indsp,3],pch=16, main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup)) | 
|  | 109             points(xb[indpb,2], xb[indpb,3],pch=5) | 
|  | 110             points(cbind(resloess$x,resloess$fitted)[order(resloess$x),],type="l",col="green3") | 
|  | 111             points(cbind(resloessSample$x,resloessSample$fitted)[order(resloessSample$x),],type="l",col="green3",lty=2) | 
|  | 112             points(reslowess,type="l",col="red"); points(reslowessSample,type="l",col="red",lty=2) | 
|  | 113             abline(lsfit(xb[indpb,2],xb[indpb,3]),col="blue") | 
|  | 114             abline(lsfit(xb[indsp,2],xb[indsp,3]),lty=2,col="blue") | 
|  | 115             legend("topleft",c("pools","samples"),lty=c(1,2),bty="n") | 
|  | 116             legend("topright",c("linear","lowess","loess"),lty=1,col=c("blue","red","green3"),bty="n") | 
|  | 117           } | 
|  | 118         } | 
|  | 119 # series de plot avant et apres correction | 
|  | 120 minval=min(x[p+nbid]);maxval=max(x[p+nbid]) | 
|  | 121 plot( x[[args$injection_order_col_name]], x[,p+nbid],col=x[[args$batch_col_name]],ylim=c(minval,maxval),ylab=labion, | 
|  | 122       main=paste0("before correction (CV for pools = ",round(cv[p,1],2),")")) | 
|  | 123 suppressWarnings(plot.design( x[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect before correction")) | 
|  | 124     } | 
|  | 125 dev.off() | 
|  | 126 pre_bilan=data.frame(pre_bilan) | 
|  | 127 labion=dimnames(x)[[2]][nbid+1:nbi] | 
|  | 128 for (i in 1:nbb) { | 
|  | 129   dimnames(pre_bilan)[[2]][3*i-2]=paste("batch",i,"linear") | 
|  | 130   dimnames(pre_bilan)[[2]][3*i-1]=paste("batch",i,"loess") | 
|  | 131   dimnames(pre_bilan)[[2]][3*i]=paste("batch",i,"lowess") | 
|  | 132 } | 
|  | 133 bilan=data.frame(labion,pre_bilan) | 
|  | 134 write.table(bilan,file=outres,sep="\t",row.names=F,quote=F) | 
|  | 135 } | 
|  | 136 | 
|  | 137 | 
|  | 138 normlowess=function (xb,detail="no",vref=1,b,span=NULL) { | 
|  | 139   #	Correction function applied to 1 ion in 1 batch. Use a lowess regression computed on QC-pools in order to correct samples intensity values | 
|  | 140   #	xb : dataframe for 1 ion in columns and samples in rows. | 
|  | 141   # vref : reference value (average of ion) | 
|  | 142   # b : batch subscript | 
|  | 143   # nbid: number of samples description columns (id and factors) with at least : "batch","injectionOrder","sampleType" | 
|  | 144   indpb = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$pool) # pools subscripts of current batch | 
|  | 145   indsp = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample) # samples of current batch subscripts | 
|  | 146   indbt = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample | xb[[args$sample_type_col_name]]==args$sample_type_tags$pool);# batch subscripts of all samples and QC-pools | 
|  | 147   labion=dimnames(xb)[[2]][3] | 
|  | 148   newval=xb[[3]] # initialisation of corrected values = intial values | 
|  | 149   ind <- 0 # initialisation of correction indicator | 
|  | 150   normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="lowess") | 
|  | 151   #cat("batch:",b," dim xb=",dim(xb)," ok=",normTodo,"\n") | 
|  | 152   if (normTodo==0) { | 
|  | 153     if(length(span)==0){span2<-2*length(indpb)/length(indsp)}else{span2<-span} | 
|  | 154     reslowess=lowess(xb[indpb,2],xb[indpb,3],f=span2) # lowess regression with QC-pools | 
|  | 155     px=xb[indsp,2];  # vector of injectionOrder values only for samples | 
|  | 156     for(j in 1:length(indbt)) { | 
|  | 157       if (xb[[args$sample_type_col_name]][j]==args$sample_type_tags$pool) { | 
|  | 158         if (reslowess$y[which(indpb==j)]==0) reslowess$y[which(indpb==j)] <- 1 | 
|  | 159         newval[j]=(vref*xb[j,3]) / (reslowess$y[which(indpb==j)])} | 
|  | 160       else { # for samples, the correction value cor correspond to the nearest QCpools | 
|  | 161         cor= reslowess$y[which(abs(reslowess$x-px[which(indsp==j)])==min(abs(reslowess$x - px[which(indsp==j)])))] | 
|  | 162         if (length(cor)>1) {cor=cor[1]} | 
|  | 163         if (cor <= 0) {cor=vref} # no modification of initial value | 
|  | 164         newval[j]=(vref*xb[j,3]) / cor | 
|  | 165       } | 
|  | 166     } | 
|  | 167     if (detail=="reg") { | 
|  | 168       liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3]) | 
|  | 169       plot(xb[indsp,2],xb[indsp,3],pch=16,main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup)) | 
|  | 170       points(xb[indpb,2], xb[indpb,3],pch=5) | 
|  | 171       points(reslowess,type="l",col="red") | 
|  | 172     } | 
|  | 173     ind <- 1 | 
|  | 174   } else {# if ok_norm <> 0 , we perform a correction based on batch samples average | 
|  | 175     moySample=mean(xb[indsp,3]);if (moySample==0) moySample=1 | 
|  | 176     newval[indsp] = (vref*xb[indsp,3])/moySample | 
|  | 177     if(length(indpb)>0){ | 
|  | 178       moypool=mean(xb[indpb,3]) ; if (moypool==0) moypool=1 | 
|  | 179       newval[indpb] = (vref*xb[indpb,3])/moypool | 
|  | 180     } | 
|  | 181   } | 
|  | 182   newval <- list(norm.ion=newval,norm.ind=ind) | 
|  | 183   return(newval) | 
|  | 184 } | 
|  | 185 | 
|  | 186 normlinear <- function (xb,detail="no",vref=1,b,valneg=0) { | 
|  | 187   # Correction function applied to 1 ion in 1 batch. | 
|  | 188   # Use a linear regression computed on QC-pools in order to correct samples intensity values | 
|  | 189   # xb: dataframe with ions in columns and samples in rows; x is a result of concatenation of sample metadata file and ion file | 
|  | 190   # nbid: number of sample description columns (id and factors) with at least "batch", "injectionOrder" and "sampleType" | 
|  | 191   # b: which batch it is | 
|  | 192   # valneg: to determine what to do with generated negative and Inf values | 
|  | 193   indpb = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$pool)# pools subscripts of current batch | 
|  | 194   indsp = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample)# samples of current batch subscripts | 
|  | 195   indbt = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample | xb[[args$sample_type_col_name]]==args$sample_type_tags$pool) # QCpools and samples of current batch subscripts | 
|  | 196   labion=dimnames(xb)[[2]][3] | 
|  | 197   newval=xb[[3]] # initialisation of corrected values = intial values | 
|  | 198   ind <- 0 # initialisation of correction indicator | 
|  | 199   normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="linear") | 
|  | 200   if (normTodo==0) { | 
|  | 201     ind <- 1 | 
|  | 202     reslsfit=lsfit(xb[indpb,2],xb[indpb,3])       # linear regression for QCpools | 
|  | 203     reslsfitSample=lsfit(xb[indsp,2],xb[indsp,3]) # linear regression for samples | 
|  | 204     ordori=reslsfit$coefficients[1] | 
|  | 205     pente=reslsfit$coefficients[2] | 
|  | 206     if (detail=="reg") { | 
|  | 207       liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3]) | 
|  | 208       plot(xb[indsp,2],xb[indsp,3],pch=16, | 
|  | 209            main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup)) | 
|  | 210       points(xb[indpb,2], xb[indpb,3],pch=5) | 
|  | 211       abline(reslsfit) | 
|  | 212       abline(reslsfitSample,lty=2) | 
|  | 213     } | 
|  | 214     # correction with rescaling of ion global intensity (vref) | 
|  | 215     newval = (vref*xb[indbt,3]) / (pente * (xb[indbt,2]) + ordori) | 
|  | 216 	newval[which((pente * (xb[indbt,2]) + ordori)<1)] <- -1 # to handle cases where 0<denominator<1 | 
|  | 217 	# handling if any negative values (or null denominators) | 
|  | 218 	if(length(which((newval==Inf)|(newval<0)))!=0){ | 
|  | 219 	  toajust <- which((newval==Inf)|(newval<0)) | 
|  | 220 	  if(valneg=="NA"){ | 
|  | 221 	    newval[toajust] <- NA | 
|  | 222 	  } else { | 
|  | 223 	    newval[toajust] <- vref * (xb[indbt,3][toajust]) / mean(xb[indbt,3]) | 
|  | 224     ### Other possibility | 
|  | 225 	##  if(pente>0){ # slope orientation | 
|  | 226 	##    newval[toajust]<-(vref*(xb[indbt,3][toajust]))/(pente*ceiling(-ordori/pente+1.00001)+ordori) | 
|  | 227 	##  }else{ | 
|  | 228 	##    newval[toajust]<-(vref*(xb[indbt,3][toajust]))/(pente*floor(-ordori/pente-1.00001)+ordori) | 
|  | 229 	##  } | 
|  | 230 	  } | 
|  | 231 	} | 
|  | 232   } else {# if ok_norm!=0 , we perform a correction based on batch samples average. | 
|  | 233     moySample=mean(xb[indsp,3]); if (moySample==0) moySample=1 | 
|  | 234     newval[indsp] = (vref*xb[indsp,3])/moySample | 
|  | 235     if(length(indpb)>0){ | 
|  | 236       moypool=mean(xb[indpb,3]) ; if (moypool==0) moypool=1 | 
|  | 237       newval[indpb] = (vref*xb[indpb,3])/moypool | 
|  | 238     } | 
|  | 239   } | 
|  | 240   newval <- list(norm.ion=newval,norm.ind=ind) | 
|  | 241   return(newval) | 
|  | 242 } | 
|  | 243 | 
|  | 244 | 
|  | 245 normloess <- function (xb,detail="no",vref=1,b,span=NULL) { | 
|  | 246     #	Correction function applied to 1 ion in 1 batch. | 
|  | 247     #   Use a loess regression computed on QC-pools in order to correct samples intensity values. | 
|  | 248     #	xb : dataframe for 1 ion in columns and samples in rows. | 
|  | 249     #   detail : level of detail in the outlog file. | 
|  | 250     #   vref : reference value (average of ion) | 
|  | 251     #   b : batch subscript | 
|  | 252     indpb = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$pool) # pools subscripts of current batch | 
|  | 253     indsp = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample) # samples of current batch subscripts | 
|  | 254     indbt = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample | xb[[args$sample_type_col_name]]==args$sample_type_tags$pool);# batch subscripts of all samples and QCpools | 
|  | 255     labion=dimnames(xb)[[2]][3] | 
|  | 256     newval=xb[[3]] # initialisation of corrected values = intial values | 
|  | 257     ind <- 0 # initialisation of correction indicator | 
|  | 258     normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="loess") | 
|  | 259     #cat("batch:",b," dim xb=",dim(xb)," ok=",normTodo,"\n") | 
|  | 260     if (normTodo==0) { | 
|  | 261         if(length(span)==0){span1<-1}else{span1<-span} | 
|  | 262         resloess=loess(xb[indpb,3]~xb[indpb,2],span=span1,degree=2,family="gaussian",iterations=4,surface="direct") # loess regression with QCpools | 
|  | 263         cor=predict(resloess,newdata=xb[,2]) | 
|  | 264 		    cor[cor<=1] <- 1 | 
|  | 265         newval=(vref*xb[,3]) / cor | 
|  | 266         if(length(which(newval>3*(quantile(newval)[4])))>0){ # in this case no modification of initial value | 
|  | 267 			newval <- xb[,3]} else {ind <- 1} # confirmation of correction | 
|  | 268         if ((detail=="reg")&(ind==1)) { # plot | 
|  | 269             liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3]) | 
|  | 270             plot(xb[indsp,2],xb[indsp,3],pch=16,main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup)) | 
|  | 271             points(xb[indpb,2], xb[indpb,3],pch=5) | 
|  | 272             points(cbind(resloess$x,resloess$fitted)[order(resloess$x),],type="l",col="red") | 
|  | 273         } | 
|  | 274     } | 
|  | 275     if (ind==0) {# if ok_norm != 0 or if correction creates outliers, we perform a correction based on batch samples average | 
|  | 276       moySample=mean(xb[indsp,3]);if (moySample==0) moySample=1 | 
|  | 277       newval[indsp] = (vref*xb[indsp,3])/moySample | 
|  | 278       if(length(indpb)>0){ | 
|  | 279         moypool=mean(xb[indpb,3]) ; if (moypool==0) moypool=1 | 
|  | 280         newval[indpb] = (vref*xb[indpb,3])/moypool | 
|  | 281       } | 
|  | 282     } | 
|  | 283     newval <- list(norm.ion=newval,norm.ind=ind) | 
|  | 284     return(newval) | 
|  | 285 } | 
|  | 286 | 
|  | 287 | 
|  | 288 | 
|  | 289 norm_QCpool <- function (x, nbid, outlog, fact, metaion, detail="no", NormMoyPool=F, NormInt=F, method="linear",span="none",valNull="0") | 
|  | 290 { | 
|  | 291   ### Correction applying linear or lo(w)ess correction function on all ions for every batch of a dataframe. | 
|  | 292   # x: dataframe with ions in column and samples' metadata | 
|  | 293   # nbid: number of sample description columns (id and factors) with at least "batch", "injectionOrder", "sampleType" | 
|  | 294   # outlog: name of regression plots and PCA pdf file | 
|  | 295   # fact: factor to be used as categorical variable for plots | 
|  | 296   # metaion: dataframe of ions' metadata | 
|  | 297   # detail: level of detail in the outlog file. detail="no" ACP + boxplot of CV before and after correction. | 
|  | 298   #          detail="plot" with plot for all batch before and after correction. | 
|  | 299   #          detail="reg" with added plots with regression lines for all batches. | 
|  | 300   # NormMoyPool: not used | 
|  | 301   # NormInt: not used | 
|  | 302   # method: regression method to be used to correct : "linear" or "lowess" or "loess" | 
|  | 303   # valNull: to determine what to do with negatively estimated intensities | 
|  | 304 	indfact		=which(dimnames(x)[[2]]==fact) | 
|  | 305 	indtypsamp=which(dimnames(x)[[2]]==args$sample_type_col_name) | 
|  | 306 	indbatch	=which(dimnames(x)[[2]]==args$batch_col_name) | 
|  | 307 	indinject	=which(dimnames(x)[[2]]==args$injection_order_col_name) | 
|  | 308 	lastIon=dim(x)[2] | 
|  | 309 	valref=apply(as.matrix(x[,(nbid+1):(lastIon)]),2,mean) # reference value for each ion used to still have the same rought size of values | 
|  | 310 	nbi=lastIon-nbid # number of ions | 
|  | 311 	nbb=length(levels(x[[args$batch_col_name]])) # Number of batch(es) = number of levels of factor "batch" (can be =1) | 
|  | 312 	nbs=length(x[[args$sample_type_col_name]][x[[args$sample_type_col_name]]==args$sample_type_tags$sample])# Number of samples | 
|  | 313 	nbp=length(x[[args$sample_type_col_name]][x[[args$sample_type_col_name]]==args$sample_type_tags$pool])# Number of QCpools | 
|  | 314 	Xn=data.frame(x[,c(1:nbid)],matrix(0,nrow=nbp+nbs,ncol=nbi))# initialisation of the corrected dataframe (=initial dataframe) | 
|  | 315   dimnames(Xn)=dimnames(x) | 
|  | 316 	cv=data.frame(matrix(0,nrow=nbi,ncol=2))# initialisation of dataframe containing CV before and after correction | 
|  | 317 	dimnames(cv)[[2]]=c("avant","apres") | 
|  | 318 	if (detail!="reg" && detail!="plot" && detail!="no") {detail="no"} | 
|  | 319 	pdf(outlog,width=27,height=20) | 
|  | 320 	cat(nbi," ions ",nbb," batch(es) \n") | 
|  | 321 	if (detail=="plot") {if(nbb<6){par(mfrow=c(3,3),ask=F,cex=1.5)}else{par(mfrow=c(4,4),ask=F,cex=1.5)}} | 
|  | 322   res.ind <- matrix(NA,ncol=nbb,nrow=nbi,dimnames=list(dimnames(x)[[2]][-c(1:nbid)],paste("norm.b",1:nbb,sep=""))) | 
|  | 323 	for (p in 1:nbi) {# for each ion | 
|  | 324 	  labion=dimnames(x)[[2]][p+nbid] | 
|  | 325         if (detail == "reg") {if(nbb<6){par(mfrow=c(3,3),ask=F,cex=1.5)}else{par(mfrow=c(4,4),ask=F,cex=1.5)}} | 
|  | 326 		indpool=which(x[[args$sample_type_col_name]]==args$sample_type_tags$pool)# QCpools subscripts in all batches | 
|  | 327 		pools1=x[indpool,p+nbid]; cv[p,1]=sd(pools1)/mean(pools1)# CV before correction | 
|  | 328 		for (b in 1:nbb) {# for every batch | 
|  | 329 			indpb = which(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b] & x[[args$sample_type_col_name]]==args$sample_type_tags$pool)# QCpools subscripts of the current batch | 
|  | 330 			indsp = which(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b] & x[[args$sample_type_col_name]]==args$sample_type_tags$sample)# samples subscripts of the current batch | 
|  | 331       indbt = which(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b] & (x[[args$sample_type_col_name]]==args$sample_type_tags$pool | x[[args$sample_type_col_name]]==args$sample_type_tags$sample)) # subscripts of all samples | 
|  | 332       # cat(dimnames(x)[[2]][p+nbid]," indsp:",length(indsp)," indpb=",length(indpb)," indbt=",length(indbt)," ") | 
|  | 333 			sub=data.frame(x[(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b]),c(indtypsamp,indinject,p+nbid)]) | 
|  | 334 			if (method=="linear") { res.norm = normlinear(sub,detail,valref[p],b,valNull) | 
|  | 335 			} else { if (method=="loess"){ res.norm <- normloess(sub,detail,valref[p],b,span) | 
|  | 336 			  } else { if (method=="lowess"){ res.norm <- normlowess(sub,detail,valref[p],b,span) | 
|  | 337           } else {stop("\n--\nNo valid 'method' argument supplied.\nMust be 'linear','loess' or 'lowess'.\n--\n")} | 
|  | 338 			}} | 
|  | 339 			Xn[indbt,p+nbid] = res.norm[[1]] | 
|  | 340 			res.ind[p,b] <- res.norm[[2]] | 
|  | 341 			# CV batch test : if after normaliszation, CV before < CV after initial values are kept | 
|  | 342 #  			moypoolRaw=mean(x[indpb,p+nbid])  ; if (moypoolRaw==0) moypoolRaw=1 | 
|  | 343 #			moySampleRaw=mean(x[indsp,p+nbid]); if (moySampleRaw==0) moySampleRaw=1 | 
|  | 344 #			moypool=mean(Xn[indpb,p+nbid]) ; if (moypool==0) moypool=1 | 
|  | 345 #			#moySample=mean(Xn[indsp,p+nbid]); if (moySample==0) moySample=1 | 
|  | 346 #			if (sd( Xn[indpb,p+nbid])/moypool>sd(x[indpb,p+nbid])/moypoolRaw) { | 
|  | 347 #					Xn[indpb,p+nbid] = (valref[p]*x[indpb,p+nbid])/moypoolRaw | 
|  | 348 #					Xn[indsp,p+nbid] = (valref[p]*x[indsp,p+nbid])/moySampleRaw | 
|  | 349 #			} | 
|  | 350 		} | 
|  | 351 		Xn[indpool,p+nbid][Xn[indpool,p+nbid]<0] <- 0 | 
|  | 352 		pools2=Xn[indpool,p+nbid]; cv[p,2]=sd(pools2,na.rm=TRUE)/mean(pools2,na.rm=TRUE)# CV apres correction | 
|  | 353 		if (detail=="reg" || detail=="plot" ) { | 
|  | 354 		  	# plot before and after correction | 
|  | 355 		  	minval=min(cbind(x[p+nbid],Xn[p+nbid]),na.rm=TRUE);maxval=max(cbind(x[p+nbid],Xn[p+nbid]),na.rm=TRUE) | 
|  | 356 		  	plot( x[[args$injection_order_col_name]], x[,p+nbid],col=x[[args$batch_col_name]],ylab=labion,ylim=c(minval,maxval), | 
|  | 357               main=paste0("before correction (CV for pools = ",round(cv[p,1],2),")")) | 
|  | 358               points(x[[args$injection_order_col_name]][indpool],x[indpool,p+nbid],col="maroon",pch=".",cex=2) | 
|  | 359 		  	plot(Xn[[args$injection_order_col_name]],Xn[,p+nbid],col=x[[args$batch_col_name]],ylab="",ylim=c(minval,maxval), | 
|  | 360              main=paste0("after correction (CV for pools = ",round(cv[p,2],2),")")) | 
|  | 361               points(Xn[[args$injection_order_col_name]][indpool],Xn[indpool,p+nbid],col="maroon",pch=".",cex=2) | 
|  | 362 		  	suppressWarnings(plot.design( x[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect before correction")) | 
|  | 363 		  	suppressWarnings(plot.design(Xn[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect after correction")) | 
|  | 364 		} | 
|  | 365 	} | 
|  | 366   ### Replacement of post correction negative values by chosen value | 
|  | 367 	Xnn=Xn | 
|  | 368 	for (i in c((nbid+1):dim(Xn)[2])) { | 
|  | 369 	  cneg=which(Xn[[i]]<0) | 
|  | 370 	  Xnn[[i]]=replace(Xn[[i]],cneg,as.numeric(valNull)) | 
|  | 371 	} | 
|  | 372   Xn=Xnn | 
|  | 373 | 
|  | 374 	if (detail=="reg" || detail=="plot" || detail=="no") { | 
|  | 375 		if (nbi > 3) { | 
|  | 376 			par(mfrow=c(3,4),ask=F,cex=1.2) # PCA Plot before/after, normed only and ions plot | 
|  | 377 			acplight(x[,c(indtypsamp,indbatch,indtypsamp,indfact,(nbid+1):lastIon)],"uv",TRUE) | 
|  | 378 			norm.ion <- which(colnames(Xn)%in%(rownames(res.ind)[which(rowSums(res.ind)>=1)])) | 
|  | 379 			acplight(Xn[,c(indtypsamp,indbatch,indtypsamp,indfact,(nbid+1):lastIon)],"uv",TRUE,norm.ion) | 
|  | 380 			if(length(norm.ion)>0){acplight(Xn[,c(indtypsamp,indbatch,indtypsamp,indfact,norm.ion)],"uv",TRUE)} | 
|  | 381 			par(mfrow=c(1,2),ask=F,cex=1.2) # Before/after boxplot | 
|  | 382 			cvplot=cv[!is.na(cv[[1]])&!is.na(cv[[2]]),] | 
|  | 383       if(nrow(cvplot)>0){ | 
|  | 384 			  boxplot(cvplot[[1]],ylim=c(min(cvplot),max(cvplot)),main="CV before correction") | 
|  | 385         boxplot(cvplot[[2]],ylim=c(min(cvplot),max(cvplot)),main="CV after correction") | 
|  | 386       } | 
|  | 387       dev.off() | 
|  | 388 		} | 
|  | 389 	} | 
|  | 390   if (nbi<=3) {dev.off()} | 
|  | 391   # transposed matrix is return  (format of the initial matrix with ions in rows) | 
|  | 392   Xr=Xn[,-c(1:nbid)]; dimnames(Xr)[[1]]=Xn[[1]] | 
|  | 393   Xr=t(Xr) ; Xr <- data.frame(ions=rownames(Xr),Xr) | 
|  | 394 | 
|  | 395   res.norm[[1]] <- Xr ; res.norm[[2]] <- data.frame(metaion,res.ind) ; res.norm[[3]] <- x[,c(1:nbid)] | 
|  | 396   names(res.norm) <- c("dataMatrix","variableMetadata","sampleMetadata") | 
|  | 397   return(res.norm) | 
|  | 398 } | 
|  | 399 | 
|  | 400 | 
|  | 401 | 
|  | 402 | 
|  | 403 | 
|  | 404 acplight <- function(ids, scaling="uv", indiv=FALSE,indcol=NULL) { | 
|  | 405 	suppressPackageStartupMessages(library(ade4)) | 
|  | 406 	suppressPackageStartupMessages(library(pcaMethods)) | 
|  | 407   # Make a PCA and plot scores and loadings. | 
|  | 408   # First column must contain samples' identifiers. | 
|  | 409   # Columns 2 to 4 contain factors to colour the plots. | 
|  | 410   for (i in 1:3) { | 
|  | 411     idss=ids[which(ids[,i+1]!="NA"),] | 
|  | 412     idss=data.frame(idss[idss[,i+1]!="",]) | 
|  | 413     classe=as.factor(idss[[i+1]]) | 
|  | 414     idsample=as.character(idss[[1]]) | 
|  | 415     colour=1:length(levels(classe)) | 
|  | 416     ions=as.matrix(idss[,5:dim(idss)[2]]) | 
|  | 417 	# Removing ions containing NA (not compatible with standard PCA) | 
|  | 418 	ions=t(na.omit(t(ions))) | 
|  | 419 	if(i==1){if(ncol(ions)!=(ncol(idss)-4)){cat("Note:",(ncol(idss)-4)-ncol(ions),"ions were ignored for PCA display due to NA in intensities.\n")}} | 
|  | 420     # Scaling choice: "uv","none","pareto" | 
|  | 421     object=suppressWarnings(prep(ions, scale=scaling, center=TRUE)) | 
|  | 422 	if(i==1){if(length(which(apply(ions,2,var)==0))>0){cat("Warning: there are",length(which(apply(ions,2,var)==0)),"constant ions.\n")}} | 
|  | 423     # ALGO: nipals,svdImpute, Bayesian, svd, probalistic=F | 
|  | 424     result <- pca(object, center=F, method="svd", nPcs=2) | 
|  | 425     # ADE4 : to plot samples' ellipsoid for each class | 
|  | 426     s.class(result@scores, classe, cpoint = 1,xax=1,yax=2,col=colour,sub=sprintf("Scores - PCs %sx%s",1,2), possub="bottomright") | 
|  | 427     #s.label(result@loadings,label = ions, cpoint = 0, clabel=0.4, xax=1,yax=2,sub="Loadings",possub="bottomright") | 
|  | 428     if(i==1){resulti <- result} | 
|  | 429   } | 
|  | 430   if(indiv) { | 
|  | 431     colour <- rep("darkblue",length(resulti@loadings)) ; if(!is.null(indcol)) {colour[-c(indcol)] <- "red"} | 
|  | 432     plot(resulti@loadings,col=colour,main="Loadings",xaxt="n",yaxt="n",pch=20, | 
|  | 433          xlab=bquote(PC1-R^2==.(resulti@R2[1])),ylab=bquote(PC2 - R^2 == .(resulti@R2[2]))) | 
|  | 434     abline(h=0,v=0)} | 
|  | 435 } | 
|  | 436 | 
|  | 437 |