Mercurial > repos > ethevenot > batchcorrection
diff BC/batch_correction_wrapper.R @ 3:2e3a23dd6c24 draft default tip
Uploaded
author | melpetera |
---|---|
date | Thu, 28 Feb 2019 05:12:34 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BC/batch_correction_wrapper.R Thu Feb 28 05:12:34 2019 -0500 @@ -0,0 +1,193 @@ +#!/usr/bin/env Rscript + +################################################################################################ +# batch_correction_wrapper # +# # +# Author: Marion LANDI / Jean-Francois MARTIN / Melanie Petera # +# User: Galaxy # +# Original data: -- # +# Starting date: 22-07-2014 # +# Version 1: 22-07-2014 # +# Version 2: 08-12-2014 # +# Version 2.1: 09-01-2015 modification in Error message of sample matching # +# Version 2.2: 16-03-2015 inclusion of miniTools' functions for special characters # +# Version 2.90: 18-08-2015 new parameter valnull # +# Version 2.91: 25-08-2016 error message improvment # +# # +# # +# Input files: dataMatrix.txt ; sampleMetadata.txt ; variableMetadata.txt (for DBC) # +# Output files: graph_output.pdf ; corrected table ; diagnostic table # +# # +################################################################################################ + + +library(batch) #necessary for parseCommandArgs function + +##------------------------------ +## test help option +##------------------------------ + +# Prog. constants +argv.help <- commandArgs(trailingOnly = FALSE) +script.path <- sub("--file=", "", argv.help[grep("--file=", argv.help)]) +prog.name <- basename(script.path) + +# Test Help +if (length(grep('-h', argv.help)) > 0) { + cat("Usage: Rscript ", + prog.name, + "{args} \n", + "parameters: \n", + "\tanalyse {val}: must be set to \"batch_correction\"", + "\tdataMatrix {file}: set the input data matrix file (mandatory) \n", + "\tsampleMetadata {file}: set the input sample metadata file (mandatory) \n", + "\tvariableMetadata {file}: set the input variable metadata file (mandatory) \n", + "\tmethod {opt}: set the method; can set to \"linear\", \"lowess\" or \"loess\" (mandatory) \n", + "\tspan {condition}: set the span condition; set to \"none\" if method is set to \"linear\" (mandatory) \n", + "\tref_factor {value}: set the ref_factor value; (if span value is set to NULL, optional) \n", + "\tdetail {value}: set the detail value; (if span value is set to NULL, optional) \n", + "\tdataMatrix_out {file}: set the output data matrix file (mandatory) \n", + "\tvariableMetadata_out {file}: set the output variable metadata file (mandatory) \n", + "\tgraph_output {file}: set the output graph file (mandatory) \n", + "\trdata_output {file}: set the output Rdata file (mandatory) \n", + "\tbatch_col_name {val}: the column name for batch. Default value is \"batch\".\n", + "\tinjection_order_col_name {val}: the column name for the injection order. Default value is \"injectionOrder\".\n", + "\tsample_type_col_name {val}: the column name for the sample types. Default value is \"sampleType\".\n", + "\tsample_type_tags {val}: the tags used inside the sample type column, defined as key/value pairs separated by commas (example: blank=blank,pool=pool,sample=sample).\n", + "\n") + quit(status = 0) +} + +##------------------------------ +## init. params +##------------------------------ + +args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects + +# Set default col names +if ( ! 'batch_col_name' %in% names(args)) + args[['batch_col_name']] <- 'batch' +if ( ! 'injection_order_col_name' %in% names(args)) + args[['injection_order_col_name']] <- 'injectionOrder' +if ( ! 'sample_type_col_name' %in% names(args)) + args[['sample_type_col_name']] <- 'sampleType' +if ( ! 'sample_type_tags' %in% names(args)) + args[['sample_type_tags']] <- 'blank=blank,pool=pool,sample=sample' + +# Parse sample type tags +sample.type.tags <- list() +for (kv in strsplit(strsplit(args$sample_type_tags, ',')[[1]], '=')) + sample.type.tags[[kv[[1]]]] <- kv[[2]] +if ( ! all(c('pool', 'blank', 'sample') %in% names(sample.type.tags))) + stop("All tags pool, blank and sample must be defined in option sampleTypeTags.") +args$sample_type_tags <- sample.type.tags + +##------------------------------ +## init. functions +##------------------------------ + +source_local <- function(...){ + argv <- commandArgs(trailingOnly = FALSE) + base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) + for(i in 1:length(list(...))){source(paste(base_dir, list(...)[[i]], sep="/"))} +} +#Import the different functions +source_local("Normalisation_QCpool.r","easyrlibrary-lib/RcheckLibrary.R","easyrlibrary-lib/miniTools.R") + + +## Reading of input files +idsample=read.table(args$sampleMetadata,header=T,sep='\t',check.names=FALSE,comment.char = '') +iddata=read.table(args$dataMatrix,header=T,sep='\t',check.names=FALSE,comment.char = '') + +### Table match check +table.check <- match2(iddata,idsample,"sample") +if(length(table.check)>1){check.err(table.check)} + +### StockID +samp.id <- stockID(iddata,idsample,"sample") +iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match + +### Checking mandatory variables +mand.check <- "" +for(mandcol in c(args$sample_type_col_name, args$injection_order_col_name, args$batch_col_name)){ + if(!(mandcol%in%colnames(idsample))){ + mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n", + "Note: table must include this exact column name (it is case-sensitive).\n") + } +} +if(length(mand.check)>1){ + mand.check <- c(mand.check,"\nFor more information, see the help section or:", + "\n http://workflow4metabolomics.org/sites/", + "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n") + check.err(mand.check) +} + +### Formating +idsample[[1]]=make.names(idsample[[1]]) +dimnames(iddata)[[1]]=iddata[[1]] + +### Transposition of ions data +idTdata=t(iddata[,2:dim(iddata)[2]]) +idTdata=data.frame(dimnames(idTdata)[[1]],idTdata) + +### Merge of 2 files (ok even if the two dataframe are not sorted on the same key) +id=merge(idsample, idTdata, by.x=1, by.y=1) + +id[[args$batch_col_name]]=as.factor(id[[args$batch_col_name]]) +ids=id[id[[args$sample_type_col_name]] == args$sample_type_tags$pool | id[[args$sample_type_col_name]] == args$sample_type_tags$sample,] +nbid=dim(idsample)[2] + +### Checking the number of sample and pool + +# least 2 samples +if(length(which(ids[[args$sample_type_col_name]] == args$sample_type_tags$sample))<2){ + table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.", + "\nMake sure this is not due to errors in sampleType coding.\n") +} + +# least 2 pools per batch for all batchs +B <- rep(0,length(levels(ids[[args$batch_col_name]]))) +for(nbB in length(levels(ids[[args$batch_col_name]]))){ + B[nbB]<-length(which(ids[which(ids[[args$batch_col_name]]==(levels(ids[[args$batch_col_name]])[nbB])),][[args$sample_type_col_name]] == args$sample_type_tags$pool)) +} +if(length(which(B>1))==0){ + table.check <- c(table.check,"\nError: less than 2 pools specified in each batch in sample metadata.", + "\nMake sure this is not due to errors in sampleType coding.\n") +} + +### Factor of interest +factbio=args$ref_factor + + +if(args$analyse == "batch_correction") { + ## Reading of Metadata Ions file + metaion=read.table(args$variableMetadata,header=T,sep='\t',check.names=FALSE,comment.char = '') + ## Table match check + table.check <- c(table.check,match2(iddata,metaion,"variable")) + check.err(table.check) + + ## variables + detail=args$detail + method=args$method + + ## outputs + outlog=args$graph_output + + ## Launch + res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,F,F,method,args$span,args$valnull) + save(res, file=args$rdata_output) + write.table(reproduceID(res[[1]],res[[3]],"sample",samp.id)$dataMatrix, file=args$dataMatrix_out, sep = '\t', row.names=F, quote=F) + write.table(res[[2]], file=args$variableMetadata_out, sep = '\t', row.names=F, quote=F) +}else{ + ## error check + check.err(table.check) + + ## outputs + out_graph_pdf=args$out_graph_pdf + out_preNormSummary=args$out_preNormSummary + + ## Launch + plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,args$span) +} + +rm(args)