camera_annotate: lib.r comparison

comparison lib.r @ 25:230f0bc9e792 draft

planemo upload commit a634879c0e651eb0eb7b435a107ee40cf30524fa

author	lecorguille
date	Fri, 10 Feb 2017 11:11:27 -0500
parents	2a4a811c663d
children	f769635d55f9

comparison

equal deleted inserted replaced

-:10176a940ec6
+:230f0bc9e792
 # lib.r version="2.2.1"
+#@author G. Le Corguille
 #The function create a pdf from the different png generated by diffreport
 diffreport_png2pdf <- function(filebase, new_file_path) {
 pdfEicOutput = paste(new_file_path,filebase,"-eic_visible_pdf",sep="")
 pdfBoxOutput = paste(new_file_path,filebase,"-box_visible_pdf",sep="")
 system(paste("gm convert ",filebase,"_eic/*.png ",filebase,"_eic.pdf",sep=""))
 system(paste("gm convert ",filebase,"_box/*.png ",filebase,"_box.pdf",sep=""))
 file.copy(paste(filebase,"_eic.pdf",sep=""), pdfEicOutput)
 file.copy(paste(filebase,"_box.pdf",sep=""), pdfBoxOutput)
+}
+#@author G. Le Corguille
+#This function convert if it is required the Retention Time in minutes
+RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
+if (convertRTMinute){
+#converting the retention times (seconds) into minutes
+print("converting the retention times into minutes in the variableMetadata")
+variableMetadata[,"rt"]=variableMetadata[,"rt"]/60
+variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60
+variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60
+}
+return (variableMetadata)
+}
+#@author G. Le Corguille
+#This function format ions identifiers
+formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
+splitDeco = strsplit(as.character(variableMetadata$name),"_")
+idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
+namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
+variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
+return(variableMetadata)
 }
 #The function annotateDiffreport without the corr function which bugs
 annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv", dataMatrixOutput="dataMatrix.tsv",new_file_path=NULL) {
 # Resolve the bug with x11, with the function png
 options(bitmapType='cairo')
 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
 res=try(is.null(xset@filled))
 # ------ annot -------
 listArguments[["calcCiS"]]=as.logical(listArguments[["calcCiS"]])
 listArguments[["calcIso"]]=as.logical(listArguments[["calcIso"]])
 listArguments[["calcCaS"]]=as.logical(listArguments[["calcCaS"]])
-#graphMethod parameter bugs where this parameter is not defined in quick=true
+# common parameters
-if(listArguments[["quick"]]==TRUE) {
+listArguments4annotate = list(object=xset,
-xa= annotate(object=xset,nSlaves=1,sigma=listArguments[["sigma"]],perfwhm=listArguments[["perfwhm"]],maxcharge=listArguments[["maxcharge"]],maxiso=listArguments[["maxiso"]],minfrac=listArguments[["minfrac"]],ppm=listArguments[["ppm"]],mzabs=listArguments[["mzabs"]],quick=listArguments[["quick"]],polarity=listArguments[["polarity"]],max_peaks=listArguments[["max_peaks"]],intval=listArguments[["intval"]])
+nSlaves=listArguments[["nSlaves"]],sigma=listArguments[["sigma"]],perfwhm=listArguments[["perfwhm"]],
-}
+maxcharge=listArguments[["maxcharge"]],maxiso=listArguments[["maxiso"]],minfrac=listArguments[["minfrac"]],
-else {
+ppm=listArguments[["ppm"]],mzabs=listArguments[["mzabs"]],quick=listArguments[["quick"]],
-xa= annotate(object=xset,nSlaves=1,sigma=listArguments[["sigma"]],perfwhm=listArguments[["perfwhm"]],graphMethod=listArguments[["graphMethod"]],cor_eic_th=listArguments[["cor_eic_th"]],pval=listArguments[["pval"]],calcCiS=listArguments[["calcCiS"]],calcIso=listArguments[["calcIso"]],calcCaS=listArguments[["calcCaS"]],multiplier=listArguments[["multiplier"]],maxcharge=listArguments[["maxcharge"]],maxiso=listArguments[["maxiso"]],minfrac=listArguments[["minfrac"]],ppm=listArguments[["ppm"]],mzabs=listArguments[["mzabs"]],quick=listArguments[["quick"]],polarity=listArguments[["polarity"]],max_peaks=listArguments[["max_peaks"]],intval=listArguments[["intval"]])
+polarity=listArguments[["polarity"]],max_peaks=listArguments[["max_peaks"]],intval=listArguments[["intval"]])
-}
+# quick == FALSE
-peakList=getPeaklist(xa,intval=listArguments[["intval"]])
+if(listArguments[["quick"]]==FALSE) {
-peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
+listArguments4annotate = append(listArguments4annotate,
+list(graphMethod=listArguments[["graphMethod"]],cor_eic_th=listArguments[["cor_eic_th"]],pval=listArguments[["pval"]],
+calcCiS=listArguments[["calcCiS"]],calcIso=listArguments[["calcIso"]],calcCaS=listArguments[["calcCaS"]]))
-# --- Multi condition : diffreport ---
+# no ruleset
-diffrep=NULL
+if (!is.null(listArguments[["multiplier"]])) {
-if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) {
+listArguments4annotate = append(listArguments4annotate,
+list(multiplier=listArguments[["multiplier"]]))
+}
+# ruleset
+else {
+rulset=read.table(listArguments[["rules"]], h=T, sep=";")
+if (ncol(rulset) < 4) rulset=read.table(listArguments[["rules"]], h=T, sep="\t")
+if (ncol(rulset) < 4) rulset=read.table(listArguments[["rules"]], h=T, sep=",")
+if (ncol(rulset) < 4) {
+error_message="Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation"
+print(error_message)
+stop(error_message)
+}
+listArguments4annotate = append(listArguments4annotate,
+list(rules=rulset))
+}
+}
+# launch annotate
+xa = do.call("annotate", listArguments4annotate)
+peakList=getPeaklist(xa,intval=listArguments[["intval"]])
+peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
+# --- dataMatrix ---
+dataMatrix = peakList[,(make.names(colnames(peakList)) %in% c("name", make.names(sampnames(xa@xcmsSet))))]
+write.table(dataMatrix, sep="\t", quote=FALSE, row.names=FALSE, file=dataMatrixOutput)
+# --- Multi condition : diffreport ---
+diffrep=NULL
+if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) {
 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
 res=try(is.null(xset@filled))
 classes=levels(sampclass(xset))
 x=1:(length(classes)-1)
 for (i in seq(along=x) ) {
 y=1:(length(classes))
 for (n in seq(along=y)){
 if(i+n <= length(classes)){
 filebase=paste(classes[i],class2=classes[i+n],sep="-vs-")
 diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]])
-#combines results
-diffreportTSV=merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F)
+diffrepOri = diffrep
-diffreportTSV=cbind(diffreportTSV[,!(colnames(diffreportTSV) %in% c(sampnames(xa@xcmsSet)))],diffreportTSV[,(colnames(diffreportTSV) %in% c(sampnames(xa@xcmsSet)))])
+# renamming of the column rtmed to rt to fit with camera peaklist function output
-if(listArguments[["sortpval"]]){
+colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt"
-diffreportTSV=diffreportTSV[order(diffreportTSV$pvalue), ]
+colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz"
-}
+# combines results and reorder columns
-if (listArguments[["convert_param"]]){
+diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F)
-#converting the retention times (seconds) into minutes
+diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))])
-diffreportTSV$rt=diffreportTSV$rt/60;diffreportTSV$rtmin=diffreportTSV$rtmin/60; diffreportTSV$rtmax=diffreportTSV$rtmax/60;
-}
+diffrep = RTSecondToMinute(diffrep, listArguments[["convertRTMinute"]])
-write.table(diffreportTSV, sep="\t", quote=FALSE, row.names=FALSE, file=paste(new_file_path,filebase,"-tabular_visible_tabular",sep=""))
+diffrep = formatIonIdentifiers(diffrep, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]])
-if (listArguments[["eicmax"]] != 0) {
+if(listArguments[["sortpval"]]){
-diffreport_png2pdf(filebase, new_file_path)
+diffrep=diffrep[order(diffrep$pvalue), ]
 }
-}
-}
+write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste(new_file_path,filebase,"-tabular_visible_tabular",sep=""))
-}
-}
+if (listArguments[["eicmax"]] != 0) {
+diffreport_png2pdf(filebase, new_file_path)
+}
+}
+}
-# --- variableMetadata ---
+}
-variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
+}
-# if we have 2 conditions, we keep stat of diffrep
-if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))==2) {
-variableMetadata = merge(variableMetadata, diffrep[,c("name","fold","tstat","pvalue")],by.x="name", by.y="name", sort=F)
+# --- variableMetadata ---
-if(exists("listArguments[[\"sortpval\"]]")){
+variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
-variableMetadata=variableMetadata[order(variableMetadata$pvalue), ]
+variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]])
-}
+variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]])
-}
+# if we have 2 conditions, we keep stat of diffrep
+if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))==2) {
-variableMetadataOri=variableMetadata
+variableMetadata = merge(variableMetadata, diffrep[,c("name","fold","tstat","pvalue")],by.x="name", by.y="name", sort=F)
-if (listArguments[["convert_param"]]){
+if(exists("listArguments[[\"sortpval\"]]")){
-#converting the retention times (seconds) into minutes
+variableMetadata=variableMetadata[order(variableMetadata$pvalue), ]
-print("converting the retention times into minutes in the variableMetadata")
+}
-variableMetadata$rt=variableMetadata$rt/60;variableMetadata$rtmin=variableMetadata$rtmin/60; variableMetadata$rtmax=variableMetadata$rtmax/60;
+}
-}
-#Transform metabolites name
+variableMetadataOri=variableMetadata
-variableMetadata$name= paste("M",round(variableMetadata$mz,digits=listArguments[["num_digits"]]),"T",round(variableMetadata$rt),sep="")
+write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
-write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
+return(list("xa"=xa,"diffrep"=diffrepOri,"variableMetadata"=variableMetadataOri));
-# --- dataMatrix ---
-dataMatrix = peakList[,(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
+}
-dataMatrix=cbind(peakList$name,dataMatrix); colnames(dataMatrix)[1] = c("name");
-if (listArguments[["convert_param"]]){
+combinexsAnnos_function <- function(xaP, xaN, listOFlistArgumentsP,listOFlistArgumentsN, diffrepP=NULL,diffrepN=NULL,pos=TRUE,tol=2,ruleset=NULL,keep_meta=TRUE, intval="into", convertRTMinute=F, numDigitsMZ=0, numDigitsRT=0, variableMetadataOutput="variableMetadata.tsv"){
-#converting the retention times (seconds) into minutes
-print("converting the retention times into minutes in the dataMatrix ids")
+#Load the two Rdata to extract the xset objects from positive and negative mode
-peakList$rt=peakList$rt/60
+cat("\tObject xset from positive mode\n")
-}
+print(xaP)
-dataMatrix$name= paste("M",round(peakList$mz,digits=listArguments[["num_digits"]]),"T",round(peakList$rt),sep="")
+cat("\n")
-write.table(dataMatrix, sep="\t", quote=FALSE, row.names=FALSE, file=dataMatrixOutput)
+cat("\tObject xset from negative mode\n")
-return(list("xa"=xa,"diffrep"=diffrep,"variableMetadata"=variableMetadataOri));
+print(xaN)
+cat("\n")
-}
+cat("\n")
+cat("\tCombining...\n")
-combinexsAnnos_function <- function(xaP, xaN, listOFlistArgumentsP,listOFlistArgumentsN, diffrepP=NULL,diffrepN=NULL,convert_param=FALSE,pos=TRUE,tol=2,ruleset=NULL,keep_meta=TRUE, variableMetadataOutput="variableMetadata.tsv"){
+#Convert the string to numeric for creating matrix
+row=as.numeric(strsplit(ruleset,",")[[1]][1])
-#Load the two Rdata to extract the xset objects from positive and negative mode
+column=as.numeric(strsplit(ruleset,",")[[1]][2])
-cat("\tObject xset from positive mode\n")
+ruleset=cbind(row,column)
-print(xaP)
+#Test if the file comes from an older version tool
-cat("\n")
+if ((!is.null(xaP)) & (!is.null(xaN))) {
+#Launch the combinexsannos function from CAMERA
-cat("\tObject xset from negative mode\n")
+cAnnot=combinexsAnnos(xaP, xaN,pos=pos,tol=tol,ruleset=ruleset)
-print(xaN)
+} else {
-cat("\n")
+stop("You must relauch the CAMERA.annotate step with the lastest version.")
+}
-cat("\n")
-cat("\tCombining...\n")
+if(pos){
-#Convert the string to numeric for creating matrix
+xa=xaP
-row=as.numeric(strsplit(ruleset,",")[[1]][1])
+listOFlistArgumentsP=listOFlistArguments
-column=as.numeric(strsplit(ruleset,",")[[1]][2])
+mode="neg. Mode"
-ruleset=cbind(row,column)
+} else {
-#Test if the file comes from an older version tool
+xa=xaN
-if ((!is.null(xaP)) & (!is.null(xaN))) {
+listOFlistArgumentsN=listOFlistArguments
-#Launch the combinexsannos function from CAMERA
+mode="pos. Mode"
-cAnnot=combinexsAnnos(xaP, xaN,pos=pos,tol=tol,ruleset=ruleset)
+}
-} else {
-stop("You must relauch the CAMERA.annotate step with the lastest version.")
+peakList=getPeaklist(xa,intval=intval)
-}
+peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
+variableMetadata=cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup",mode)]);
+variableMetadata=variableMetadata[,!(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))]
-if(pos){
+#Test if there are more than two classes (conditions)
-xa=xaP
+if ( nlevels(sampclass(xaP@xcmsSet))==2 & (!is.null(diffrepN)) & (!is.null(diffrepP))) {
-listOFlistArgumentsP=listOFlistArguments
+diffrepP = diffrepP[,c("name","fold","tstat","pvalue")]; colnames(diffrepP) = paste("P.",colnames(diffrepP),sep="")
-mode="neg. Mode"
+diffrepN = diffrepN[,c("name","fold","tstat","pvalue")]; colnames(diffrepN) = paste("N.",colnames(diffrepN),sep="")
-} else {
-xa=xaN
+variableMetadata = merge(variableMetadata, diffrepP, by.x="name", by.y="P.name")
-listOFlistArgumentsN=listOFlistArguments
+variableMetadata = merge(variableMetadata, diffrepN, by.x="name", by.y="N.name")
-mode="pos. Mode"
+}
-}
-intval = "into"; for (steps in names(listOFlistArguments)) { if (!is.null(listOFlistArguments[[steps]]$intval)) intval = listOFlistArguments[[steps]]$intval }
+rownames(variableMetadata) = NULL
-peakList=getPeaklist(xa,intval=intval)
+#TODO: checker
-peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
+#colnames(variableMetadata)[1:2] = c("name","mz/rt");
-variableMetadata=cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup",mode)]);
-variableMetadata=variableMetadata[,!(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))]
+variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute)
+variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
-#Test if there are more than two classes (conditions)
-if ( nlevels(sampclass(xaP@xcmsSet))==2 & (!is.null(diffrepN)) & (!is.null(diffrepP))) {
+#If the user want to keep only the metabolites which match a difference
-diffrepP = diffrepP[,c("name","fold","tstat","pvalue")]; colnames(diffrepP) = paste("P.",colnames(diffrepP),sep="")
+if(keep_meta){
-diffrepN = diffrepN[,c("name","fold","tstat","pvalue")]; colnames(diffrepN) = paste("N.",colnames(diffrepN),sep="")
+variableMetadata=variableMetadata[variableMetadata[,c(mode)]!="",]
+}
-variableMetadata = merge(variableMetadata, diffrepP, by.x="name", by.y="P.name")
-variableMetadata = merge(variableMetadata, diffrepN, by.x="name", by.y="N.name")
+#Write the output into a tsv file
-}
+write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
+return(variableMetadata);
-rownames(variableMetadata) = NULL
-#TODO: checker
+}
-#colnames(variableMetadata)[1:2] = c("name","mz/rt");
-#If the user want to convert the retention times (seconds) into minutes.
-if (listArguments[["convert_param"]]){
-#converting the retention times (seconds) into minutes
-cat("\tConverting the retention times into minutes\n")
-variableMetadata$rtmed=cAnnot$rt/60; variableMetadata$rtmin=cAnnot$rtmin/60; variableMetadata$rtmax=cAnnot$rtmax/60;
-}
-#If the user want to keep only the metabolites which match a difference
-if(keep_meta){
-variableMetadata=variableMetadata[variableMetadata[,c(mode)]!="",]
-}
-#Write the output into a tsv file
-write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
-return(variableMetadata);
-}

Mercurial > repos > lecorguille > camera_annotate

comparison lib.r @ 25:230f0bc9e792 draft