Mercurial > repos > ethevenot > batchcorrection
comparison BC/batch_correction_wrapper.R @ 3:2e3a23dd6c24 draft default tip
Uploaded
| author | melpetera |
|---|---|
| date | Thu, 28 Feb 2019 05:12:34 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:57edfd3943ab | 3:2e3a23dd6c24 |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 | |
| 3 ################################################################################################ | |
| 4 # batch_correction_wrapper # | |
| 5 # # | |
| 6 # Author: Marion LANDI / Jean-Francois MARTIN / Melanie Petera # | |
| 7 # User: Galaxy # | |
| 8 # Original data: -- # | |
| 9 # Starting date: 22-07-2014 # | |
| 10 # Version 1: 22-07-2014 # | |
| 11 # Version 2: 08-12-2014 # | |
| 12 # Version 2.1: 09-01-2015 modification in Error message of sample matching # | |
| 13 # Version 2.2: 16-03-2015 inclusion of miniTools' functions for special characters # | |
| 14 # Version 2.90: 18-08-2015 new parameter valnull # | |
| 15 # Version 2.91: 25-08-2016 error message improvment # | |
| 16 # # | |
| 17 # # | |
| 18 # Input files: dataMatrix.txt ; sampleMetadata.txt ; variableMetadata.txt (for DBC) # | |
| 19 # Output files: graph_output.pdf ; corrected table ; diagnostic table # | |
| 20 # # | |
| 21 ################################################################################################ | |
| 22 | |
| 23 | |
| 24 library(batch) #necessary for parseCommandArgs function | |
| 25 | |
| 26 ##------------------------------ | |
| 27 ## test help option | |
| 28 ##------------------------------ | |
| 29 | |
| 30 # Prog. constants | |
| 31 argv.help <- commandArgs(trailingOnly = FALSE) | |
| 32 script.path <- sub("--file=", "", argv.help[grep("--file=", argv.help)]) | |
| 33 prog.name <- basename(script.path) | |
| 34 | |
| 35 # Test Help | |
| 36 if (length(grep('-h', argv.help)) > 0) { | |
| 37 cat("Usage: Rscript ", | |
| 38 prog.name, | |
| 39 "{args} \n", | |
| 40 "parameters: \n", | |
| 41 "\tanalyse {val}: must be set to \"batch_correction\"", | |
| 42 "\tdataMatrix {file}: set the input data matrix file (mandatory) \n", | |
| 43 "\tsampleMetadata {file}: set the input sample metadata file (mandatory) \n", | |
| 44 "\tvariableMetadata {file}: set the input variable metadata file (mandatory) \n", | |
| 45 "\tmethod {opt}: set the method; can set to \"linear\", \"lowess\" or \"loess\" (mandatory) \n", | |
| 46 "\tspan {condition}: set the span condition; set to \"none\" if method is set to \"linear\" (mandatory) \n", | |
| 47 "\tref_factor {value}: set the ref_factor value; (if span value is set to NULL, optional) \n", | |
| 48 "\tdetail {value}: set the detail value; (if span value is set to NULL, optional) \n", | |
| 49 "\tdataMatrix_out {file}: set the output data matrix file (mandatory) \n", | |
| 50 "\tvariableMetadata_out {file}: set the output variable metadata file (mandatory) \n", | |
| 51 "\tgraph_output {file}: set the output graph file (mandatory) \n", | |
| 52 "\trdata_output {file}: set the output Rdata file (mandatory) \n", | |
| 53 "\tbatch_col_name {val}: the column name for batch. Default value is \"batch\".\n", | |
| 54 "\tinjection_order_col_name {val}: the column name for the injection order. Default value is \"injectionOrder\".\n", | |
| 55 "\tsample_type_col_name {val}: the column name for the sample types. Default value is \"sampleType\".\n", | |
| 56 "\tsample_type_tags {val}: the tags used inside the sample type column, defined as key/value pairs separated by commas (example: blank=blank,pool=pool,sample=sample).\n", | |
| 57 "\n") | |
| 58 quit(status = 0) | |
| 59 } | |
| 60 | |
| 61 ##------------------------------ | |
| 62 ## init. params | |
| 63 ##------------------------------ | |
| 64 | |
| 65 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects | |
| 66 | |
| 67 # Set default col names | |
| 68 if ( ! 'batch_col_name' %in% names(args)) | |
| 69 args[['batch_col_name']] <- 'batch' | |
| 70 if ( ! 'injection_order_col_name' %in% names(args)) | |
| 71 args[['injection_order_col_name']] <- 'injectionOrder' | |
| 72 if ( ! 'sample_type_col_name' %in% names(args)) | |
| 73 args[['sample_type_col_name']] <- 'sampleType' | |
| 74 if ( ! 'sample_type_tags' %in% names(args)) | |
| 75 args[['sample_type_tags']] <- 'blank=blank,pool=pool,sample=sample' | |
| 76 | |
| 77 # Parse sample type tags | |
| 78 sample.type.tags <- list() | |
| 79 for (kv in strsplit(strsplit(args$sample_type_tags, ',')[[1]], '=')) | |
| 80 sample.type.tags[[kv[[1]]]] <- kv[[2]] | |
| 81 if ( ! all(c('pool', 'blank', 'sample') %in% names(sample.type.tags))) | |
| 82 stop("All tags pool, blank and sample must be defined in option sampleTypeTags.") | |
| 83 args$sample_type_tags <- sample.type.tags | |
| 84 | |
| 85 ##------------------------------ | |
| 86 ## init. functions | |
| 87 ##------------------------------ | |
| 88 | |
| 89 source_local <- function(...){ | |
| 90 argv <- commandArgs(trailingOnly = FALSE) | |
| 91 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | |
| 92 for(i in 1:length(list(...))){source(paste(base_dir, list(...)[[i]], sep="/"))} | |
| 93 } | |
| 94 #Import the different functions | |
| 95 source_local("Normalisation_QCpool.r","easyrlibrary-lib/RcheckLibrary.R","easyrlibrary-lib/miniTools.R") | |
| 96 | |
| 97 | |
| 98 ## Reading of input files | |
| 99 idsample=read.table(args$sampleMetadata,header=T,sep='\t',check.names=FALSE,comment.char = '') | |
| 100 iddata=read.table(args$dataMatrix,header=T,sep='\t',check.names=FALSE,comment.char = '') | |
| 101 | |
| 102 ### Table match check | |
| 103 table.check <- match2(iddata,idsample,"sample") | |
| 104 if(length(table.check)>1){check.err(table.check)} | |
| 105 | |
| 106 ### StockID | |
| 107 samp.id <- stockID(iddata,idsample,"sample") | |
| 108 iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match | |
| 109 | |
| 110 ### Checking mandatory variables | |
| 111 mand.check <- "" | |
| 112 for(mandcol in c(args$sample_type_col_name, args$injection_order_col_name, args$batch_col_name)){ | |
| 113 if(!(mandcol%in%colnames(idsample))){ | |
| 114 mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n", | |
| 115 "Note: table must include this exact column name (it is case-sensitive).\n") | |
| 116 } | |
| 117 } | |
| 118 if(length(mand.check)>1){ | |
| 119 mand.check <- c(mand.check,"\nFor more information, see the help section or:", | |
| 120 "\n http://workflow4metabolomics.org/sites/", | |
| 121 "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n") | |
| 122 check.err(mand.check) | |
| 123 } | |
| 124 | |
| 125 ### Formating | |
| 126 idsample[[1]]=make.names(idsample[[1]]) | |
| 127 dimnames(iddata)[[1]]=iddata[[1]] | |
| 128 | |
| 129 ### Transposition of ions data | |
| 130 idTdata=t(iddata[,2:dim(iddata)[2]]) | |
| 131 idTdata=data.frame(dimnames(idTdata)[[1]],idTdata) | |
| 132 | |
| 133 ### Merge of 2 files (ok even if the two dataframe are not sorted on the same key) | |
| 134 id=merge(idsample, idTdata, by.x=1, by.y=1) | |
| 135 | |
| 136 id[[args$batch_col_name]]=as.factor(id[[args$batch_col_name]]) | |
| 137 ids=id[id[[args$sample_type_col_name]] == args$sample_type_tags$pool | id[[args$sample_type_col_name]] == args$sample_type_tags$sample,] | |
| 138 nbid=dim(idsample)[2] | |
| 139 | |
| 140 ### Checking the number of sample and pool | |
| 141 | |
| 142 # least 2 samples | |
| 143 if(length(which(ids[[args$sample_type_col_name]] == args$sample_type_tags$sample))<2){ | |
| 144 table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.", | |
| 145 "\nMake sure this is not due to errors in sampleType coding.\n") | |
| 146 } | |
| 147 | |
| 148 # least 2 pools per batch for all batchs | |
| 149 B <- rep(0,length(levels(ids[[args$batch_col_name]]))) | |
| 150 for(nbB in length(levels(ids[[args$batch_col_name]]))){ | |
| 151 B[nbB]<-length(which(ids[which(ids[[args$batch_col_name]]==(levels(ids[[args$batch_col_name]])[nbB])),][[args$sample_type_col_name]] == args$sample_type_tags$pool)) | |
| 152 } | |
| 153 if(length(which(B>1))==0){ | |
| 154 table.check <- c(table.check,"\nError: less than 2 pools specified in each batch in sample metadata.", | |
| 155 "\nMake sure this is not due to errors in sampleType coding.\n") | |
| 156 } | |
| 157 | |
| 158 ### Factor of interest | |
| 159 factbio=args$ref_factor | |
| 160 | |
| 161 | |
| 162 if(args$analyse == "batch_correction") { | |
| 163 ## Reading of Metadata Ions file | |
| 164 metaion=read.table(args$variableMetadata,header=T,sep='\t',check.names=FALSE,comment.char = '') | |
| 165 ## Table match check | |
| 166 table.check <- c(table.check,match2(iddata,metaion,"variable")) | |
| 167 check.err(table.check) | |
| 168 | |
| 169 ## variables | |
| 170 detail=args$detail | |
| 171 method=args$method | |
| 172 | |
| 173 ## outputs | |
| 174 outlog=args$graph_output | |
| 175 | |
| 176 ## Launch | |
| 177 res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,F,F,method,args$span,args$valnull) | |
| 178 save(res, file=args$rdata_output) | |
| 179 write.table(reproduceID(res[[1]],res[[3]],"sample",samp.id)$dataMatrix, file=args$dataMatrix_out, sep = '\t', row.names=F, quote=F) | |
| 180 write.table(res[[2]], file=args$variableMetadata_out, sep = '\t', row.names=F, quote=F) | |
| 181 }else{ | |
| 182 ## error check | |
| 183 check.err(table.check) | |
| 184 | |
| 185 ## outputs | |
| 186 out_graph_pdf=args$out_graph_pdf | |
| 187 out_preNormSummary=args$out_preNormSummary | |
| 188 | |
| 189 ## Launch | |
| 190 plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,args$span) | |
| 191 } | |
| 192 | |
| 193 rm(args) |
