3
|
1 #!/usr/bin/env Rscript
|
|
2
|
|
3 ################################################################################################
|
|
4 # batch_correction_wrapper #
|
|
5 # #
|
|
6 # Author: Marion LANDI / Jean-Francois MARTIN / Melanie Petera #
|
|
7 # User: Galaxy #
|
|
8 # Original data: -- #
|
|
9 # Starting date: 22-07-2014 #
|
|
10 # Version 1: 22-07-2014 #
|
|
11 # Version 2: 08-12-2014 #
|
|
12 # Version 2.1: 09-01-2015 modification in Error message of sample matching #
|
|
13 # Version 2.2: 16-03-2015 inclusion of miniTools' functions for special characters #
|
|
14 # Version 2.90: 18-08-2015 new parameter valnull #
|
|
15 # Version 2.91: 25-08-2016 error message improvment #
|
|
16 # #
|
|
17 # #
|
|
18 # Input files: dataMatrix.txt ; sampleMetadata.txt ; variableMetadata.txt (for DBC) #
|
|
19 # Output files: graph_output.pdf ; corrected table ; diagnostic table #
|
|
20 # #
|
|
21 ################################################################################################
|
|
22
|
|
23
|
|
24 library(batch) #necessary for parseCommandArgs function
|
|
25
|
|
26 ##------------------------------
|
|
27 ## test help option
|
|
28 ##------------------------------
|
|
29
|
|
30 # Prog. constants
|
|
31 argv.help <- commandArgs(trailingOnly = FALSE)
|
|
32 script.path <- sub("--file=", "", argv.help[grep("--file=", argv.help)])
|
|
33 prog.name <- basename(script.path)
|
|
34
|
|
35 # Test Help
|
|
36 if (length(grep('-h', argv.help)) > 0) {
|
|
37 cat("Usage: Rscript ",
|
|
38 prog.name,
|
|
39 "{args} \n",
|
|
40 "parameters: \n",
|
|
41 "\tanalyse {val}: must be set to \"batch_correction\"",
|
|
42 "\tdataMatrix {file}: set the input data matrix file (mandatory) \n",
|
|
43 "\tsampleMetadata {file}: set the input sample metadata file (mandatory) \n",
|
|
44 "\tvariableMetadata {file}: set the input variable metadata file (mandatory) \n",
|
|
45 "\tmethod {opt}: set the method; can set to \"linear\", \"lowess\" or \"loess\" (mandatory) \n",
|
|
46 "\tspan {condition}: set the span condition; set to \"none\" if method is set to \"linear\" (mandatory) \n",
|
|
47 "\tref_factor {value}: set the ref_factor value; (if span value is set to NULL, optional) \n",
|
|
48 "\tdetail {value}: set the detail value; (if span value is set to NULL, optional) \n",
|
|
49 "\tdataMatrix_out {file}: set the output data matrix file (mandatory) \n",
|
|
50 "\tvariableMetadata_out {file}: set the output variable metadata file (mandatory) \n",
|
|
51 "\tgraph_output {file}: set the output graph file (mandatory) \n",
|
|
52 "\trdata_output {file}: set the output Rdata file (mandatory) \n",
|
|
53 "\tbatch_col_name {val}: the column name for batch. Default value is \"batch\".\n",
|
|
54 "\tinjection_order_col_name {val}: the column name for the injection order. Default value is \"injectionOrder\".\n",
|
|
55 "\tsample_type_col_name {val}: the column name for the sample types. Default value is \"sampleType\".\n",
|
|
56 "\tsample_type_tags {val}: the tags used inside the sample type column, defined as key/value pairs separated by commas (example: blank=blank,pool=pool,sample=sample).\n",
|
|
57 "\n")
|
|
58 quit(status = 0)
|
|
59 }
|
|
60
|
|
61 ##------------------------------
|
|
62 ## init. params
|
|
63 ##------------------------------
|
|
64
|
|
65 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
|
|
66
|
|
67 # Set default col names
|
|
68 if ( ! 'batch_col_name' %in% names(args))
|
|
69 args[['batch_col_name']] <- 'batch'
|
|
70 if ( ! 'injection_order_col_name' %in% names(args))
|
|
71 args[['injection_order_col_name']] <- 'injectionOrder'
|
|
72 if ( ! 'sample_type_col_name' %in% names(args))
|
|
73 args[['sample_type_col_name']] <- 'sampleType'
|
|
74 if ( ! 'sample_type_tags' %in% names(args))
|
|
75 args[['sample_type_tags']] <- 'blank=blank,pool=pool,sample=sample'
|
|
76
|
|
77 # Parse sample type tags
|
|
78 sample.type.tags <- list()
|
|
79 for (kv in strsplit(strsplit(args$sample_type_tags, ',')[[1]], '='))
|
|
80 sample.type.tags[[kv[[1]]]] <- kv[[2]]
|
|
81 if ( ! all(c('pool', 'blank', 'sample') %in% names(sample.type.tags)))
|
|
82 stop("All tags pool, blank and sample must be defined in option sampleTypeTags.")
|
|
83 args$sample_type_tags <- sample.type.tags
|
|
84
|
|
85 ##------------------------------
|
|
86 ## init. functions
|
|
87 ##------------------------------
|
|
88
|
|
89 source_local <- function(...){
|
|
90 argv <- commandArgs(trailingOnly = FALSE)
|
|
91 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
|
|
92 for(i in 1:length(list(...))){source(paste(base_dir, list(...)[[i]], sep="/"))}
|
|
93 }
|
|
94 #Import the different functions
|
|
95 source_local("Normalisation_QCpool.r","easyrlibrary-lib/RcheckLibrary.R","easyrlibrary-lib/miniTools.R")
|
|
96
|
|
97
|
|
98 ## Reading of input files
|
|
99 idsample=read.table(args$sampleMetadata,header=T,sep='\t',check.names=FALSE,comment.char = '')
|
|
100 iddata=read.table(args$dataMatrix,header=T,sep='\t',check.names=FALSE,comment.char = '')
|
|
101
|
|
102 ### Table match check
|
|
103 table.check <- match2(iddata,idsample,"sample")
|
|
104 if(length(table.check)>1){check.err(table.check)}
|
|
105
|
|
106 ### StockID
|
|
107 samp.id <- stockID(iddata,idsample,"sample")
|
|
108 iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match
|
|
109
|
|
110 ### Checking mandatory variables
|
|
111 mand.check <- ""
|
|
112 for(mandcol in c(args$sample_type_col_name, args$injection_order_col_name, args$batch_col_name)){
|
|
113 if(!(mandcol%in%colnames(idsample))){
|
|
114 mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n",
|
|
115 "Note: table must include this exact column name (it is case-sensitive).\n")
|
|
116 }
|
|
117 }
|
|
118 if(length(mand.check)>1){
|
|
119 mand.check <- c(mand.check,"\nFor more information, see the help section or:",
|
|
120 "\n http://workflow4metabolomics.org/sites/",
|
|
121 "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n")
|
|
122 check.err(mand.check)
|
|
123 }
|
|
124
|
|
125 ### Formating
|
|
126 idsample[[1]]=make.names(idsample[[1]])
|
|
127 dimnames(iddata)[[1]]=iddata[[1]]
|
|
128
|
|
129 ### Transposition of ions data
|
|
130 idTdata=t(iddata[,2:dim(iddata)[2]])
|
|
131 idTdata=data.frame(dimnames(idTdata)[[1]],idTdata)
|
|
132
|
|
133 ### Merge of 2 files (ok even if the two dataframe are not sorted on the same key)
|
|
134 id=merge(idsample, idTdata, by.x=1, by.y=1)
|
|
135
|
|
136 id[[args$batch_col_name]]=as.factor(id[[args$batch_col_name]])
|
|
137 ids=id[id[[args$sample_type_col_name]] == args$sample_type_tags$pool | id[[args$sample_type_col_name]] == args$sample_type_tags$sample,]
|
|
138 nbid=dim(idsample)[2]
|
|
139
|
|
140 ### Checking the number of sample and pool
|
|
141
|
|
142 # least 2 samples
|
|
143 if(length(which(ids[[args$sample_type_col_name]] == args$sample_type_tags$sample))<2){
|
|
144 table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.",
|
|
145 "\nMake sure this is not due to errors in sampleType coding.\n")
|
|
146 }
|
|
147
|
|
148 # least 2 pools per batch for all batchs
|
|
149 B <- rep(0,length(levels(ids[[args$batch_col_name]])))
|
|
150 for(nbB in length(levels(ids[[args$batch_col_name]]))){
|
|
151 B[nbB]<-length(which(ids[which(ids[[args$batch_col_name]]==(levels(ids[[args$batch_col_name]])[nbB])),][[args$sample_type_col_name]] == args$sample_type_tags$pool))
|
|
152 }
|
|
153 if(length(which(B>1))==0){
|
|
154 table.check <- c(table.check,"\nError: less than 2 pools specified in each batch in sample metadata.",
|
|
155 "\nMake sure this is not due to errors in sampleType coding.\n")
|
|
156 }
|
|
157
|
|
158 ### Factor of interest
|
|
159 factbio=args$ref_factor
|
|
160
|
|
161
|
|
162 if(args$analyse == "batch_correction") {
|
|
163 ## Reading of Metadata Ions file
|
|
164 metaion=read.table(args$variableMetadata,header=T,sep='\t',check.names=FALSE,comment.char = '')
|
|
165 ## Table match check
|
|
166 table.check <- c(table.check,match2(iddata,metaion,"variable"))
|
|
167 check.err(table.check)
|
|
168
|
|
169 ## variables
|
|
170 detail=args$detail
|
|
171 method=args$method
|
|
172
|
|
173 ## outputs
|
|
174 outlog=args$graph_output
|
|
175
|
|
176 ## Launch
|
|
177 res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,F,F,method,args$span,args$valnull)
|
|
178 save(res, file=args$rdata_output)
|
|
179 write.table(reproduceID(res[[1]],res[[3]],"sample",samp.id)$dataMatrix, file=args$dataMatrix_out, sep = '\t', row.names=F, quote=F)
|
|
180 write.table(res[[2]], file=args$variableMetadata_out, sep = '\t', row.names=F, quote=F)
|
|
181 }else{
|
|
182 ## error check
|
|
183 check.err(table.check)
|
|
184
|
|
185 ## outputs
|
|
186 out_graph_pdf=args$out_graph_pdf
|
|
187 out_preNormSummary=args$out_preNormSummary
|
|
188
|
|
189 ## Launch
|
|
190 plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,args$span)
|
|
191 }
|
|
192
|
|
193 rm(args)
|