Mercurial > repos > ethevenot > batchcorrection
diff batch_correction_wrapper.R @ 3:2e3a23dd6c24 draft default tip
Uploaded
author | melpetera |
---|---|
date | Thu, 28 Feb 2019 05:12:34 -0500 |
parents | 57edfd3943ab |
children |
line wrap: on
line diff
--- a/batch_correction_wrapper.R Mon May 01 08:06:08 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,131 +0,0 @@ -#!/usr/bin/Rscript --vanilla --slave --no-site-file - -################################################################################################ -# batch_correction_wrapper # -# # -# Author: Marion LANDI / Jean-Francois MARTIN / Melanie Petera # -# User: Galaxy # -# Original data: -- # -# Starting date: 22-07-2014 # -# Version 1: 22-07-2014 # -# Version 2: 08-12-2014 # -# Version 2.1: 09-01-2015 modification in Error message of sample matching # -# Version 2.2: 16-03-2015 inclusion of miniTools' functions for special characters # -# Version 2.90: 18-08-2015 new parameter valnull # -# Version 2.91: 25-08-2016 error message improvment # -# # -# # -# Input files: dataMatrix.txt ; sampleMetadata.txt ; variableMetadata.txt (for DBC) # -# Output files: graph_output.pdf ; corrected table ; diagnostic table # -# # -################################################################################################ - - -library(batch) #necessary for parseCommandArgs function -args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects - -source_local <- function(...){ - argv <- commandArgs(trailingOnly = FALSE) - base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) - for(i in 1:length(list(...))){source(paste(base_dir, list(...)[[i]], sep="/"))} -} -#Import the different functions -source_local("Normalisation_QCpool.r","easyrlibrary-lib/RcheckLibrary.R","easyrlibrary-lib/miniTools.R") - - -## Reading of input files -idsample=read.table(args$sampleMetadata,header=T,sep='\t',check.names=FALSE) -iddata=read.table(args$dataMatrix,header=T,sep='\t',check.names=FALSE) - -### Table match check -table.check <- match2(iddata,idsample,"sample") -if(length(table.check)>1){check.err(table.check)} - -### StockID -samp.id <- stockID(iddata,idsample,"sample") -iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match - -### Checking mandatory variables -mand.check <- "" -for(mandcol in c("sampleType","injectionOrder","batch")){ - if(!(mandcol%in%colnames(idsample))){ - mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n", - "Note: table must include this exact column name (it is case-sensitive).\n") - } -} -if(length(mand.check)>1){ - mand.check <- c(mand.check,"\nFor more information, see the help section or:", - "\n http://workflow4metabolomics.org/sites/", - "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n") - check.err(mand.check) -} - -### Formating -idsample[[1]]=make.names(idsample[[1]]) -dimnames(iddata)[[1]]=iddata[[1]] - -### Transposition of ions data -idTdata=t(iddata[,2:dim(iddata)[2]]) -idTdata=data.frame(dimnames(idTdata)[[1]],idTdata) - -### Merge of 2 files (ok even if the two dataframe are not sorted on the same key) -id=merge(idsample, idTdata, by.x=1, by.y=1) - -id$batch=as.factor(id$batch) -ids=id[id$sampleType == 'pool' | id$sampleType == 'sample',] -nbid=dim(idsample)[2] - -### Checking the number of sample and pool - -# least 2 samples -if(length(which(ids$sampleType == "sample"))<2){ - table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.", - "\nMake sure this is not due to errors in sampleType coding.\n") -} - -# least 2 pools per batch for all batchs -B <- rep(0,length(levels(ids$batch))) -for(nbB in length(levels(ids$batch))){ - B[nbB]<-length(which(ids[which(ids$batch==(levels(ids$batch)[nbB])),]$sampleType == "pool")) -} -if(length(which(B>1))==0){ - table.check <- c(table.check,"\nError: less than 2 pools specified in each batch in sample metadata.", - "\nMake sure this is not due to errors in sampleType coding.\n") -} - -### Factor of interest -factbio=args$ref_factor - - -if(args$analyse == "batch_correction") { - ## Reading of Metadata Ions file - metaion=read.table(args$variableMetadata,header=T,sep='\t',check.names=FALSE) - ## Table match check - table.check <- c(table.check,match2(iddata,metaion,"variable")) - check.err(table.check) - - ## variables - detail=args$detail - method=args$method - - ## outputs - outlog=args$graph_output - - ## Launch - res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,F,F,method,args$span,args$valnull) - save(res, file=args$rdata_output) - write.table(reproduceID(res[[1]],res[[3]],"sample",samp.id)$dataMatrix, file=args$dataMatrix_out, sep = '\t', row.names=F, quote=F) - write.table(res[[2]], file=args$variableMetadata_out, sep = '\t', row.names=F, quote=F) -}else{ - ## error check - check.err(table.check) - - ## outputs - out_graph_pdf=args$out_graph_pdf - out_preNormSummary=args$out_preNormSummary - - ## Launch - plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,args$span) -} - -rm(args) \ No newline at end of file