Mercurial > repos > ethevenot > batchcorrection
comparison BC/Normalisation_QCpool.r @ 3:2e3a23dd6c24 draft default tip
Uploaded
| author | melpetera | 
|---|---|
| date | Thu, 28 Feb 2019 05:12:34 -0500 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 2:57edfd3943ab | 3:2e3a23dd6c24 | 
|---|---|
| 1 # Author: jfmartin | |
| 2 # Modified by : mpetera | |
| 3 ############################################################################### | |
| 4 # Correction of analytical effects inter and intra batch on intensities using quality control pooled samples (QC-pools) | |
| 5 # according to the algorithm mentioned by Van der Kloet (J Prot Res 2009). | |
| 6 # Parameters : a dataframe of Ions intensities and an other of samples? metadata which must contains at least the three following columns : | |
| 7 # "batch" to identify the batches of analyses ; need at least 3 QC-pools for linear adjustment and 8 for lo(w)ess adjustment | |
| 8 # "injectionOrder" integer defining the injection order of all samples : QC-pools and analysed samples | |
| 9 # "sampleType" indicates if defining a sample with "sample" or a QC-pool with "pool" | |
| 10 # NO MISSING DATA are allowed | |
| 11 # Version 0.91 insertion of ok_norm function to assess correction feasibility | |
| 12 # Version 0.92 insertion of slope test in ok_norm | |
| 13 # Version 0.93 name of log file define as a parameter of the correction function | |
| 14 # Version 0.94 Within a batch, test if all QCpools or samples values = 0. Definition of an error code in ok_norm function (see function for details) | |
| 15 # Version 0.99 include non linear lowess correction. | |
| 16 # Version 1.00 the corrected result matrix is return transposed in Galaxy | |
| 17 # Version 1.01 standard deviation=0 instead of sum of value=0 is used to assess constant data in ok_norm function. Negative values in corrected matrix are converted to 0. | |
| 18 # Version 1.02 plotsituation create a result file with the error code of non execution of correction set by function ok_norm | |
| 19 # Version 1.03 fix bug in plot with "reg" option. suppression of ok_norm=4 condition if ok_norm function | |
| 20 # Version 2.00 Addition of loess function, correction indicator, plots ; modification of returned objects' format, some plots' displays and ok_norm ifelse format | |
| 21 # Version 2.01 Correction for pools negative values earlier in norm_QCpool | |
| 22 # Version 2.10 Script refreshing ; vocabulary adjustment ; span in parameters for lo(w)ess regression ; conditionning for third line ACP display ; order in loess display | |
| 23 # Version 2.11 ok1 and ok2 permutation (ok_norm) ; conditional display of regression (plotsituation) ; grouping of linked lignes + conditioning (normX) ; conditioning for CVplot | |
| 24 # Version 2.20 acplight function added from previous toolBox.R [# Version 1.01 "NA"-coding possibility added in acplight function] | |
| 25 # Version 2.30 addition of suppressWarnings() for known and controlled warnings ; suppression of one useless "cat" message ; change in Rdata names ; 'batch(es)' in cat | |
| 26 # Version 2.90 change in handling of generated negative and Inf values | |
| 27 # Version 2.91 Plot improvement | |
| 28 | |
| 29 ok_norm=function(qcp,qci,spl,spi,method) { | |
| 30 # Function used for one ion within one batch to determine whether or not batch correction is possible | |
| 31 # ok_norm values : | |
| 32 # 0 : no preliminary-condition problem | |
| 33 # 1 : standard deviation of QC-pools or samples = 0 | |
| 34 # 2 : insufficient number of QC-pools within a batch (n=3 for linear, n=8 for lowess or loess) | |
| 35 # 3 : significant difference between QC-pools' and samples' means | |
| 36 # 4 : denominator =0 when on 1 pool per batch <> 0 | |
| 37 # 5 : (linear regression only) the slopes ratio ?QC-pools/samples? is lower than -0.2 | |
| 38 | |
| 39 ok=0 | |
| 40 if (method=="linear") {minQC=3} else {minQC=8} | |
| 41 if (length(qcp)<minQC) { ok=2 | |
| 42 } else { | |
| 43 if (sd(qcp)==0 | sd(spl)==0) { ok=1 | |
| 44 } else { | |
| 45 cvp= sd(qcp)/mean(qcp); cvs=sd(spl)/mean(spl) | |
| 46 rttest=t.test(qcp,y=spl) | |
| 47 reslsfit=lsfit(qci, qcp) | |
| 48 reslsfitSample=lsfit(spl, spi) | |
| 49 ordori=reslsfit$coefficients[1] | |
| 50 penteB=reslsfit$coefficients[2] | |
| 51 penteS=reslsfitSample$coefficients[2] | |
| 52 # Significant difference between samples and pools | |
| 53 if (rttest$p.value < 0.01) { ok=3 | |
| 54 } else { | |
| 55 # to avoid denominator =0 when on 1 pool per batch <> 0 | |
| 56 if (method=="linear" & length(which(((penteB*qci)+ordori)==0))>0 ){ ok=6 | |
| 57 } else { | |
| 58 # different sloop between samples and pools | |
| 59 if (method=="linear" & penteB/penteS < -0.20) { ok=5 } | |
| 60 }}}} | |
| 61 ok_norm=ok | |
| 62 } | |
| 63 | |
| 64 plotsituation <- function (x, nbid,outfic="plot_regression.pdf", outres="PreNormSummary.txt",fact=args$batch_col_name,span="none") { | |
| 65 # Check for all ions in every batch if linear or lo(w)ess correction is possible. | |
| 66 # Use ok_norm function and create a file (PreNormSummary.txt) with the error code. | |
| 67 # Also create a pdf file with plots of linear and lo(w)ess regression lines. | |
| 68 # x: dataframe with ions in columns and samples in rows ; x is the result of concatenation of sample metadata file and ions file | |
| 69 # nbid: number of samples description columns (id and factors) with at least : "batch","injectionOrder","sampleType" | |
| 70 # outfic: name of regression plots pdf file | |
| 71 # fact: factor to be used as categorical variable for plots and PCA. | |
| 72 indfact =which(dimnames(x)[[2]]==fact) | |
| 73 indtypsamp =which(dimnames(x)[[2]]==args$sample_type_col_name) | |
| 74 indbatch =which(dimnames(x)[[2]]==args$batch_col_name) | |
| 75 indinject =which(dimnames(x)[[2]]==args$injection_order_col_name) | |
| 76 lastIon=dim(x)[2] | |
| 77 nbi=lastIon-nbid # Number of ions = total number of columns - number of identifying columns | |
| 78 nbb=length(levels(x[[args$batch_col_name]])) # Number of batch = number of levels of "batch" comlumn (factor) | |
| 79 nbs=length(x[[args$sample_type_col_name]][x[[args$sample_type_col_name]]==args$sample_type_tags$sample])# Number of samples = number of rows with "sample" value in sampleType | |
| 80 pdf(outfic,width=27,height=7*ceiling((nbb+2)/3)) | |
| 81 cat(nbi," ions ",nbb," batch(es) \n") | |
| 82 cv=data.frame(matrix(0,nrow=nbi,ncol=2))# initialisation de la dataset qui contiendra les CV | |
| 83 pre_bilan=matrix(0,nrow=nbi,ncol=3*nbb) # dataset of ok_norm function results | |
| 84 for (p in 1:nbi) {# for each ion | |
| 85 par (mfrow=c(ceiling((nbb+2)/3),3),ask=F,cex=1.2) | |
| 86 labion=dimnames(x)[[2]][p+nbid] | |
| 87 indpool=which(x[[args$sample_type_col_name]]==args$sample_type_tags$pool) # QCpools subscripts in x | |
| 88 pools1=x[indpool,p+nbid]; cv[p,1]=sd(pools1)/mean(pools1)# CV before correction | |
| 89 for (b in 1:nbb) {# for each batch... | |
| 90 xb=data.frame(x[(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b]),c(indtypsamp,indinject,p+nbid)]) | |
| 91 indpb = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$pool)# QCpools subscripts in the current batch | |
| 92 indsp = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample)# samples subscripts in the current batch | |
| 93 indbt = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample | xb[[args$sample_type_col_name]]==args$sample_type_tags$pool)# indices de tous les samples d'un batch pools+samples | |
| 94 normLinearTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="linear") | |
| 95 normLoessTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="loess") | |
| 96 normLowessTest=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="lowess") | |
| 97 #cat(dimnames(x)[[2]][p+nbid]," batch ",b," loess ",normLoessTest," linear ",normLinearTest,"\n") | |
| 98 pre_bilan[ p,3*b-2]=normLinearTest | |
| 99 pre_bilan[ p,3*b-1]=normLoessTest | |
| 100 pre_bilan[ p,3*b]=normLowessTest | |
| 101 if(length(indpb)>1){ | |
| 102 if(span=="none"){span1<-1 ; span2<-2*length(indpool)/nbs}else{span1<-span ; span2<-span} | |
| 103 resloess=loess(xb[indpb,3]~xb[indpb,2],span=span1,degree=2,family="gaussian",iterations=4,surface="direct") | |
| 104 resloessSample=loess(xb[indsp,3]~xb[indsp,2],span=2*length(indpool)/nbs,degree=2,family="gaussian",iterations=4,surface="direct") | |
| 105 reslowess=lowess(xb[indpb,2],xb[indpb,3],f=span2) | |
| 106 reslowessSample=lowess(xb[indsp,2],xb[indsp,3]) | |
| 107 liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3]) | |
| 108 plot(xb[indsp,2],xb[indsp,3],pch=16, main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup)) | |
| 109 points(xb[indpb,2], xb[indpb,3],pch=5) | |
| 110 points(cbind(resloess$x,resloess$fitted)[order(resloess$x),],type="l",col="green3") | |
| 111 points(cbind(resloessSample$x,resloessSample$fitted)[order(resloessSample$x),],type="l",col="green3",lty=2) | |
| 112 points(reslowess,type="l",col="red"); points(reslowessSample,type="l",col="red",lty=2) | |
| 113 abline(lsfit(xb[indpb,2],xb[indpb,3]),col="blue") | |
| 114 abline(lsfit(xb[indsp,2],xb[indsp,3]),lty=2,col="blue") | |
| 115 legend("topleft",c("pools","samples"),lty=c(1,2),bty="n") | |
| 116 legend("topright",c("linear","lowess","loess"),lty=1,col=c("blue","red","green3"),bty="n") | |
| 117 } | |
| 118 } | |
| 119 # series de plot avant et apres correction | |
| 120 minval=min(x[p+nbid]);maxval=max(x[p+nbid]) | |
| 121 plot( x[[args$injection_order_col_name]], x[,p+nbid],col=x[[args$batch_col_name]],ylim=c(minval,maxval),ylab=labion, | |
| 122 main=paste0("before correction (CV for pools = ",round(cv[p,1],2),")")) | |
| 123 suppressWarnings(plot.design( x[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect before correction")) | |
| 124 } | |
| 125 dev.off() | |
| 126 pre_bilan=data.frame(pre_bilan) | |
| 127 labion=dimnames(x)[[2]][nbid+1:nbi] | |
| 128 for (i in 1:nbb) { | |
| 129 dimnames(pre_bilan)[[2]][3*i-2]=paste("batch",i,"linear") | |
| 130 dimnames(pre_bilan)[[2]][3*i-1]=paste("batch",i,"loess") | |
| 131 dimnames(pre_bilan)[[2]][3*i]=paste("batch",i,"lowess") | |
| 132 } | |
| 133 bilan=data.frame(labion,pre_bilan) | |
| 134 write.table(bilan,file=outres,sep="\t",row.names=F,quote=F) | |
| 135 } | |
| 136 | |
| 137 | |
| 138 normlowess=function (xb,detail="no",vref=1,b,span=NULL) { | |
| 139 # Correction function applied to 1 ion in 1 batch. Use a lowess regression computed on QC-pools in order to correct samples intensity values | |
| 140 # xb : dataframe for 1 ion in columns and samples in rows. | |
| 141 # vref : reference value (average of ion) | |
| 142 # b : batch subscript | |
| 143 # nbid: number of samples description columns (id and factors) with at least : "batch","injectionOrder","sampleType" | |
| 144 indpb = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$pool) # pools subscripts of current batch | |
| 145 indsp = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample) # samples of current batch subscripts | |
| 146 indbt = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample | xb[[args$sample_type_col_name]]==args$sample_type_tags$pool);# batch subscripts of all samples and QC-pools | |
| 147 labion=dimnames(xb)[[2]][3] | |
| 148 newval=xb[[3]] # initialisation of corrected values = intial values | |
| 149 ind <- 0 # initialisation of correction indicator | |
| 150 normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="lowess") | |
| 151 #cat("batch:",b," dim xb=",dim(xb)," ok=",normTodo,"\n") | |
| 152 if (normTodo==0) { | |
| 153 if(length(span)==0){span2<-2*length(indpb)/length(indsp)}else{span2<-span} | |
| 154 reslowess=lowess(xb[indpb,2],xb[indpb,3],f=span2) # lowess regression with QC-pools | |
| 155 px=xb[indsp,2]; # vector of injectionOrder values only for samples | |
| 156 for(j in 1:length(indbt)) { | |
| 157 if (xb[[args$sample_type_col_name]][j]==args$sample_type_tags$pool) { | |
| 158 if (reslowess$y[which(indpb==j)]==0) reslowess$y[which(indpb==j)] <- 1 | |
| 159 newval[j]=(vref*xb[j,3]) / (reslowess$y[which(indpb==j)])} | |
| 160 else { # for samples, the correction value cor correspond to the nearest QCpools | |
| 161 cor= reslowess$y[which(abs(reslowess$x-px[which(indsp==j)])==min(abs(reslowess$x - px[which(indsp==j)])))] | |
| 162 if (length(cor)>1) {cor=cor[1]} | |
| 163 if (cor <= 0) {cor=vref} # no modification of initial value | |
| 164 newval[j]=(vref*xb[j,3]) / cor | |
| 165 } | |
| 166 } | |
| 167 if (detail=="reg") { | |
| 168 liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3]) | |
| 169 plot(xb[indsp,2],xb[indsp,3],pch=16,main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup)) | |
| 170 points(xb[indpb,2], xb[indpb,3],pch=5) | |
| 171 points(reslowess,type="l",col="red") | |
| 172 } | |
| 173 ind <- 1 | |
| 174 } else {# if ok_norm <> 0 , we perform a correction based on batch samples average | |
| 175 moySample=mean(xb[indsp,3]);if (moySample==0) moySample=1 | |
| 176 newval[indsp] = (vref*xb[indsp,3])/moySample | |
| 177 if(length(indpb)>0){ | |
| 178 moypool=mean(xb[indpb,3]) ; if (moypool==0) moypool=1 | |
| 179 newval[indpb] = (vref*xb[indpb,3])/moypool | |
| 180 } | |
| 181 } | |
| 182 newval <- list(norm.ion=newval,norm.ind=ind) | |
| 183 return(newval) | |
| 184 } | |
| 185 | |
| 186 normlinear <- function (xb,detail="no",vref=1,b,valneg=0) { | |
| 187 # Correction function applied to 1 ion in 1 batch. | |
| 188 # Use a linear regression computed on QC-pools in order to correct samples intensity values | |
| 189 # xb: dataframe with ions in columns and samples in rows; x is a result of concatenation of sample metadata file and ion file | |
| 190 # nbid: number of sample description columns (id and factors) with at least "batch", "injectionOrder" and "sampleType" | |
| 191 # b: which batch it is | |
| 192 # valneg: to determine what to do with generated negative and Inf values | |
| 193 indpb = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$pool)# pools subscripts of current batch | |
| 194 indsp = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample)# samples of current batch subscripts | |
| 195 indbt = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample | xb[[args$sample_type_col_name]]==args$sample_type_tags$pool) # QCpools and samples of current batch subscripts | |
| 196 labion=dimnames(xb)[[2]][3] | |
| 197 newval=xb[[3]] # initialisation of corrected values = intial values | |
| 198 ind <- 0 # initialisation of correction indicator | |
| 199 normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="linear") | |
| 200 if (normTodo==0) { | |
| 201 ind <- 1 | |
| 202 reslsfit=lsfit(xb[indpb,2],xb[indpb,3]) # linear regression for QCpools | |
| 203 reslsfitSample=lsfit(xb[indsp,2],xb[indsp,3]) # linear regression for samples | |
| 204 ordori=reslsfit$coefficients[1] | |
| 205 pente=reslsfit$coefficients[2] | |
| 206 if (detail=="reg") { | |
| 207 liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3]) | |
| 208 plot(xb[indsp,2],xb[indsp,3],pch=16, | |
| 209 main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup)) | |
| 210 points(xb[indpb,2], xb[indpb,3],pch=5) | |
| 211 abline(reslsfit) | |
| 212 abline(reslsfitSample,lty=2) | |
| 213 } | |
| 214 # correction with rescaling of ion global intensity (vref) | |
| 215 newval = (vref*xb[indbt,3]) / (pente * (xb[indbt,2]) + ordori) | |
| 216 newval[which((pente * (xb[indbt,2]) + ordori)<1)] <- -1 # to handle cases where 0<denominator<1 | |
| 217 # handling if any negative values (or null denominators) | |
| 218 if(length(which((newval==Inf)|(newval<0)))!=0){ | |
| 219 toajust <- which((newval==Inf)|(newval<0)) | |
| 220 if(valneg=="NA"){ | |
| 221 newval[toajust] <- NA | |
| 222 } else { | |
| 223 newval[toajust] <- vref * (xb[indbt,3][toajust]) / mean(xb[indbt,3]) | |
| 224 ### Other possibility | |
| 225 ## if(pente>0){ # slope orientation | |
| 226 ## newval[toajust]<-(vref*(xb[indbt,3][toajust]))/(pente*ceiling(-ordori/pente+1.00001)+ordori) | |
| 227 ## }else{ | |
| 228 ## newval[toajust]<-(vref*(xb[indbt,3][toajust]))/(pente*floor(-ordori/pente-1.00001)+ordori) | |
| 229 ## } | |
| 230 } | |
| 231 } | |
| 232 } else {# if ok_norm!=0 , we perform a correction based on batch samples average. | |
| 233 moySample=mean(xb[indsp,3]); if (moySample==0) moySample=1 | |
| 234 newval[indsp] = (vref*xb[indsp,3])/moySample | |
| 235 if(length(indpb)>0){ | |
| 236 moypool=mean(xb[indpb,3]) ; if (moypool==0) moypool=1 | |
| 237 newval[indpb] = (vref*xb[indpb,3])/moypool | |
| 238 } | |
| 239 } | |
| 240 newval <- list(norm.ion=newval,norm.ind=ind) | |
| 241 return(newval) | |
| 242 } | |
| 243 | |
| 244 | |
| 245 normloess <- function (xb,detail="no",vref=1,b,span=NULL) { | |
| 246 # Correction function applied to 1 ion in 1 batch. | |
| 247 # Use a loess regression computed on QC-pools in order to correct samples intensity values. | |
| 248 # xb : dataframe for 1 ion in columns and samples in rows. | |
| 249 # detail : level of detail in the outlog file. | |
| 250 # vref : reference value (average of ion) | |
| 251 # b : batch subscript | |
| 252 indpb = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$pool) # pools subscripts of current batch | |
| 253 indsp = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample) # samples of current batch subscripts | |
| 254 indbt = which(xb[[args$sample_type_col_name]]==args$sample_type_tags$sample | xb[[args$sample_type_col_name]]==args$sample_type_tags$pool);# batch subscripts of all samples and QCpools | |
| 255 labion=dimnames(xb)[[2]][3] | |
| 256 newval=xb[[3]] # initialisation of corrected values = intial values | |
| 257 ind <- 0 # initialisation of correction indicator | |
| 258 normTodo=ok_norm(xb[indpb,3],xb[indpb,2], xb[indsp,3],xb[indsp,2],method="loess") | |
| 259 #cat("batch:",b," dim xb=",dim(xb)," ok=",normTodo,"\n") | |
| 260 if (normTodo==0) { | |
| 261 if(length(span)==0){span1<-1}else{span1<-span} | |
| 262 resloess=loess(xb[indpb,3]~xb[indpb,2],span=span1,degree=2,family="gaussian",iterations=4,surface="direct") # loess regression with QCpools | |
| 263 cor=predict(resloess,newdata=xb[,2]) | |
| 264 cor[cor<=1] <- 1 | |
| 265 newval=(vref*xb[,3]) / cor | |
| 266 if(length(which(newval>3*(quantile(newval)[4])))>0){ # in this case no modification of initial value | |
| 267 newval <- xb[,3]} else {ind <- 1} # confirmation of correction | |
| 268 if ((detail=="reg")&(ind==1)) { # plot | |
| 269 liminf=min(xb[indbt,3]);limsup=max(xb[indbt,3]) | |
| 270 plot(xb[indsp,2],xb[indsp,3],pch=16,main=paste(labion,"batch ",b),ylab="intensity",xlab="injection order",ylim=c(liminf,limsup)) | |
| 271 points(xb[indpb,2], xb[indpb,3],pch=5) | |
| 272 points(cbind(resloess$x,resloess$fitted)[order(resloess$x),],type="l",col="red") | |
| 273 } | |
| 274 } | |
| 275 if (ind==0) {# if ok_norm != 0 or if correction creates outliers, we perform a correction based on batch samples average | |
| 276 moySample=mean(xb[indsp,3]);if (moySample==0) moySample=1 | |
| 277 newval[indsp] = (vref*xb[indsp,3])/moySample | |
| 278 if(length(indpb)>0){ | |
| 279 moypool=mean(xb[indpb,3]) ; if (moypool==0) moypool=1 | |
| 280 newval[indpb] = (vref*xb[indpb,3])/moypool | |
| 281 } | |
| 282 } | |
| 283 newval <- list(norm.ion=newval,norm.ind=ind) | |
| 284 return(newval) | |
| 285 } | |
| 286 | |
| 287 | |
| 288 | |
| 289 norm_QCpool <- function (x, nbid, outlog, fact, metaion, detail="no", NormMoyPool=F, NormInt=F, method="linear",span="none",valNull="0") | |
| 290 { | |
| 291 ### Correction applying linear or lo(w)ess correction function on all ions for every batch of a dataframe. | |
| 292 # x: dataframe with ions in column and samples' metadata | |
| 293 # nbid: number of sample description columns (id and factors) with at least "batch", "injectionOrder", "sampleType" | |
| 294 # outlog: name of regression plots and PCA pdf file | |
| 295 # fact: factor to be used as categorical variable for plots | |
| 296 # metaion: dataframe of ions' metadata | |
| 297 # detail: level of detail in the outlog file. detail="no" ACP + boxplot of CV before and after correction. | |
| 298 # detail="plot" with plot for all batch before and after correction. | |
| 299 # detail="reg" with added plots with regression lines for all batches. | |
| 300 # NormMoyPool: not used | |
| 301 # NormInt: not used | |
| 302 # method: regression method to be used to correct : "linear" or "lowess" or "loess" | |
| 303 # valNull: to determine what to do with negatively estimated intensities | |
| 304 indfact =which(dimnames(x)[[2]]==fact) | |
| 305 indtypsamp=which(dimnames(x)[[2]]==args$sample_type_col_name) | |
| 306 indbatch =which(dimnames(x)[[2]]==args$batch_col_name) | |
| 307 indinject =which(dimnames(x)[[2]]==args$injection_order_col_name) | |
| 308 lastIon=dim(x)[2] | |
| 309 valref=apply(as.matrix(x[,(nbid+1):(lastIon)]),2,mean) # reference value for each ion used to still have the same rought size of values | |
| 310 nbi=lastIon-nbid # number of ions | |
| 311 nbb=length(levels(x[[args$batch_col_name]])) # Number of batch(es) = number of levels of factor "batch" (can be =1) | |
| 312 nbs=length(x[[args$sample_type_col_name]][x[[args$sample_type_col_name]]==args$sample_type_tags$sample])# Number of samples | |
| 313 nbp=length(x[[args$sample_type_col_name]][x[[args$sample_type_col_name]]==args$sample_type_tags$pool])# Number of QCpools | |
| 314 Xn=data.frame(x[,c(1:nbid)],matrix(0,nrow=nbp+nbs,ncol=nbi))# initialisation of the corrected dataframe (=initial dataframe) | |
| 315 dimnames(Xn)=dimnames(x) | |
| 316 cv=data.frame(matrix(0,nrow=nbi,ncol=2))# initialisation of dataframe containing CV before and after correction | |
| 317 dimnames(cv)[[2]]=c("avant","apres") | |
| 318 if (detail!="reg" && detail!="plot" && detail!="no") {detail="no"} | |
| 319 pdf(outlog,width=27,height=20) | |
| 320 cat(nbi," ions ",nbb," batch(es) \n") | |
| 321 if (detail=="plot") {if(nbb<6){par(mfrow=c(3,3),ask=F,cex=1.5)}else{par(mfrow=c(4,4),ask=F,cex=1.5)}} | |
| 322 res.ind <- matrix(NA,ncol=nbb,nrow=nbi,dimnames=list(dimnames(x)[[2]][-c(1:nbid)],paste("norm.b",1:nbb,sep=""))) | |
| 323 for (p in 1:nbi) {# for each ion | |
| 324 labion=dimnames(x)[[2]][p+nbid] | |
| 325 if (detail == "reg") {if(nbb<6){par(mfrow=c(3,3),ask=F,cex=1.5)}else{par(mfrow=c(4,4),ask=F,cex=1.5)}} | |
| 326 indpool=which(x[[args$sample_type_col_name]]==args$sample_type_tags$pool)# QCpools subscripts in all batches | |
| 327 pools1=x[indpool,p+nbid]; cv[p,1]=sd(pools1)/mean(pools1)# CV before correction | |
| 328 for (b in 1:nbb) {# for every batch | |
| 329 indpb = which(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b] & x[[args$sample_type_col_name]]==args$sample_type_tags$pool)# QCpools subscripts of the current batch | |
| 330 indsp = which(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b] & x[[args$sample_type_col_name]]==args$sample_type_tags$sample)# samples subscripts of the current batch | |
| 331 indbt = which(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b] & (x[[args$sample_type_col_name]]==args$sample_type_tags$pool | x[[args$sample_type_col_name]]==args$sample_type_tags$sample)) # subscripts of all samples | |
| 332 # cat(dimnames(x)[[2]][p+nbid]," indsp:",length(indsp)," indpb=",length(indpb)," indbt=",length(indbt)," ") | |
| 333 sub=data.frame(x[(x[[args$batch_col_name]]==levels(x[[args$batch_col_name]])[b]),c(indtypsamp,indinject,p+nbid)]) | |
| 334 if (method=="linear") { res.norm = normlinear(sub,detail,valref[p],b,valNull) | |
| 335 } else { if (method=="loess"){ res.norm <- normloess(sub,detail,valref[p],b,span) | |
| 336 } else { if (method=="lowess"){ res.norm <- normlowess(sub,detail,valref[p],b,span) | |
| 337 } else {stop("\n--\nNo valid 'method' argument supplied.\nMust be 'linear','loess' or 'lowess'.\n--\n")} | |
| 338 }} | |
| 339 Xn[indbt,p+nbid] = res.norm[[1]] | |
| 340 res.ind[p,b] <- res.norm[[2]] | |
| 341 # CV batch test : if after normaliszation, CV before < CV after initial values are kept | |
| 342 # moypoolRaw=mean(x[indpb,p+nbid]) ; if (moypoolRaw==0) moypoolRaw=1 | |
| 343 # moySampleRaw=mean(x[indsp,p+nbid]); if (moySampleRaw==0) moySampleRaw=1 | |
| 344 # moypool=mean(Xn[indpb,p+nbid]) ; if (moypool==0) moypool=1 | |
| 345 # #moySample=mean(Xn[indsp,p+nbid]); if (moySample==0) moySample=1 | |
| 346 # if (sd( Xn[indpb,p+nbid])/moypool>sd(x[indpb,p+nbid])/moypoolRaw) { | |
| 347 # Xn[indpb,p+nbid] = (valref[p]*x[indpb,p+nbid])/moypoolRaw | |
| 348 # Xn[indsp,p+nbid] = (valref[p]*x[indsp,p+nbid])/moySampleRaw | |
| 349 # } | |
| 350 } | |
| 351 Xn[indpool,p+nbid][Xn[indpool,p+nbid]<0] <- 0 | |
| 352 pools2=Xn[indpool,p+nbid]; cv[p,2]=sd(pools2,na.rm=TRUE)/mean(pools2,na.rm=TRUE)# CV apres correction | |
| 353 if (detail=="reg" || detail=="plot" ) { | |
| 354 # plot before and after correction | |
| 355 minval=min(cbind(x[p+nbid],Xn[p+nbid]),na.rm=TRUE);maxval=max(cbind(x[p+nbid],Xn[p+nbid]),na.rm=TRUE) | |
| 356 plot( x[[args$injection_order_col_name]], x[,p+nbid],col=x[[args$batch_col_name]],ylab=labion,ylim=c(minval,maxval), | |
| 357 main=paste0("before correction (CV for pools = ",round(cv[p,1],2),")")) | |
| 358 points(x[[args$injection_order_col_name]][indpool],x[indpool,p+nbid],col="maroon",pch=".",cex=2) | |
| 359 plot(Xn[[args$injection_order_col_name]],Xn[,p+nbid],col=x[[args$batch_col_name]],ylab="",ylim=c(minval,maxval), | |
| 360 main=paste0("after correction (CV for pools = ",round(cv[p,2],2),")")) | |
| 361 points(Xn[[args$injection_order_col_name]][indpool],Xn[indpool,p+nbid],col="maroon",pch=".",cex=2) | |
| 362 suppressWarnings(plot.design( x[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect before correction")) | |
| 363 suppressWarnings(plot.design(Xn[c(indtypsamp,indbatch,indfact,p+nbid)],main="factors effect after correction")) | |
| 364 } | |
| 365 } | |
| 366 ### Replacement of post correction negative values by chosen value | |
| 367 Xnn=Xn | |
| 368 for (i in c((nbid+1):dim(Xn)[2])) { | |
| 369 cneg=which(Xn[[i]]<0) | |
| 370 Xnn[[i]]=replace(Xn[[i]],cneg,as.numeric(valNull)) | |
| 371 } | |
| 372 Xn=Xnn | |
| 373 | |
| 374 if (detail=="reg" || detail=="plot" || detail=="no") { | |
| 375 if (nbi > 3) { | |
| 376 par(mfrow=c(3,4),ask=F,cex=1.2) # PCA Plot before/after, normed only and ions plot | |
| 377 acplight(x[,c(indtypsamp,indbatch,indtypsamp,indfact,(nbid+1):lastIon)],"uv",TRUE) | |
| 378 norm.ion <- which(colnames(Xn)%in%(rownames(res.ind)[which(rowSums(res.ind)>=1)])) | |
| 379 acplight(Xn[,c(indtypsamp,indbatch,indtypsamp,indfact,(nbid+1):lastIon)],"uv",TRUE,norm.ion) | |
| 380 if(length(norm.ion)>0){acplight(Xn[,c(indtypsamp,indbatch,indtypsamp,indfact,norm.ion)],"uv",TRUE)} | |
| 381 par(mfrow=c(1,2),ask=F,cex=1.2) # Before/after boxplot | |
| 382 cvplot=cv[!is.na(cv[[1]])&!is.na(cv[[2]]),] | |
| 383 if(nrow(cvplot)>0){ | |
| 384 boxplot(cvplot[[1]],ylim=c(min(cvplot),max(cvplot)),main="CV before correction") | |
| 385 boxplot(cvplot[[2]],ylim=c(min(cvplot),max(cvplot)),main="CV after correction") | |
| 386 } | |
| 387 dev.off() | |
| 388 } | |
| 389 } | |
| 390 if (nbi<=3) {dev.off()} | |
| 391 # transposed matrix is return (format of the initial matrix with ions in rows) | |
| 392 Xr=Xn[,-c(1:nbid)]; dimnames(Xr)[[1]]=Xn[[1]] | |
| 393 Xr=t(Xr) ; Xr <- data.frame(ions=rownames(Xr),Xr) | |
| 394 | |
| 395 res.norm[[1]] <- Xr ; res.norm[[2]] <- data.frame(metaion,res.ind) ; res.norm[[3]] <- x[,c(1:nbid)] | |
| 396 names(res.norm) <- c("dataMatrix","variableMetadata","sampleMetadata") | |
| 397 return(res.norm) | |
| 398 } | |
| 399 | |
| 400 | |
| 401 | |
| 402 | |
| 403 | |
| 404 acplight <- function(ids, scaling="uv", indiv=FALSE,indcol=NULL) { | |
| 405 suppressPackageStartupMessages(library(ade4)) | |
| 406 suppressPackageStartupMessages(library(pcaMethods)) | |
| 407 # Make a PCA and plot scores and loadings. | |
| 408 # First column must contain samples' identifiers. | |
| 409 # Columns 2 to 4 contain factors to colour the plots. | |
| 410 for (i in 1:3) { | |
| 411 idss=ids[which(ids[,i+1]!="NA"),] | |
| 412 idss=data.frame(idss[idss[,i+1]!="",]) | |
| 413 classe=as.factor(idss[[i+1]]) | |
| 414 idsample=as.character(idss[[1]]) | |
| 415 colour=1:length(levels(classe)) | |
| 416 ions=as.matrix(idss[,5:dim(idss)[2]]) | |
| 417 # Removing ions containing NA (not compatible with standard PCA) | |
| 418 ions=t(na.omit(t(ions))) | |
| 419 if(i==1){if(ncol(ions)!=(ncol(idss)-4)){cat("Note:",(ncol(idss)-4)-ncol(ions),"ions were ignored for PCA display due to NA in intensities.\n")}} | |
| 420 # Scaling choice: "uv","none","pareto" | |
| 421 object=suppressWarnings(prep(ions, scale=scaling, center=TRUE)) | |
| 422 if(i==1){if(length(which(apply(ions,2,var)==0))>0){cat("Warning: there are",length(which(apply(ions,2,var)==0)),"constant ions.\n")}} | |
| 423 # ALGO: nipals,svdImpute, Bayesian, svd, probalistic=F | |
| 424 result <- pca(object, center=F, method="svd", nPcs=2) | |
| 425 # ADE4 : to plot samples' ellipsoid for each class | |
| 426 s.class(result@scores, classe, cpoint = 1,xax=1,yax=2,col=colour,sub=sprintf("Scores - PCs %sx%s",1,2), possub="bottomright") | |
| 427 #s.label(result@loadings,label = ions, cpoint = 0, clabel=0.4, xax=1,yax=2,sub="Loadings",possub="bottomright") | |
| 428 if(i==1){resulti <- result} | |
| 429 } | |
| 430 if(indiv) { | |
| 431 colour <- rep("darkblue",length(resulti@loadings)) ; if(!is.null(indcol)) {colour[-c(indcol)] <- "red"} | |
| 432 plot(resulti@loadings,col=colour,main="Loadings",xaxt="n",yaxt="n",pch=20, | |
| 433 xlab=bquote(PC1-R^2==.(resulti@R2[1])),ylab=bquote(PC2 - R^2 == .(resulti@R2[2]))) | |
| 434 abline(h=0,v=0)} | |
| 435 } | |
| 436 | |
| 437 | 
