Mercurial > repos > ethevenot > batchcorrection
comparison batch_correction_wrapper.R @ 3:2e3a23dd6c24 draft default tip
Uploaded
author | melpetera |
---|---|
date | Thu, 28 Feb 2019 05:12:34 -0500 |
parents | 57edfd3943ab |
children |
comparison
equal
deleted
inserted
replaced
2:57edfd3943ab | 3:2e3a23dd6c24 |
---|---|
1 #!/usr/bin/Rscript --vanilla --slave --no-site-file | |
2 | |
3 ################################################################################################ | |
4 # batch_correction_wrapper # | |
5 # # | |
6 # Author: Marion LANDI / Jean-Francois MARTIN / Melanie Petera # | |
7 # User: Galaxy # | |
8 # Original data: -- # | |
9 # Starting date: 22-07-2014 # | |
10 # Version 1: 22-07-2014 # | |
11 # Version 2: 08-12-2014 # | |
12 # Version 2.1: 09-01-2015 modification in Error message of sample matching # | |
13 # Version 2.2: 16-03-2015 inclusion of miniTools' functions for special characters # | |
14 # Version 2.90: 18-08-2015 new parameter valnull # | |
15 # Version 2.91: 25-08-2016 error message improvment # | |
16 # # | |
17 # # | |
18 # Input files: dataMatrix.txt ; sampleMetadata.txt ; variableMetadata.txt (for DBC) # | |
19 # Output files: graph_output.pdf ; corrected table ; diagnostic table # | |
20 # # | |
21 ################################################################################################ | |
22 | |
23 | |
24 library(batch) #necessary for parseCommandArgs function | |
25 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects | |
26 | |
27 source_local <- function(...){ | |
28 argv <- commandArgs(trailingOnly = FALSE) | |
29 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | |
30 for(i in 1:length(list(...))){source(paste(base_dir, list(...)[[i]], sep="/"))} | |
31 } | |
32 #Import the different functions | |
33 source_local("Normalisation_QCpool.r","easyrlibrary-lib/RcheckLibrary.R","easyrlibrary-lib/miniTools.R") | |
34 | |
35 | |
36 ## Reading of input files | |
37 idsample=read.table(args$sampleMetadata,header=T,sep='\t',check.names=FALSE) | |
38 iddata=read.table(args$dataMatrix,header=T,sep='\t',check.names=FALSE) | |
39 | |
40 ### Table match check | |
41 table.check <- match2(iddata,idsample,"sample") | |
42 if(length(table.check)>1){check.err(table.check)} | |
43 | |
44 ### StockID | |
45 samp.id <- stockID(iddata,idsample,"sample") | |
46 iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match | |
47 | |
48 ### Checking mandatory variables | |
49 mand.check <- "" | |
50 for(mandcol in c("sampleType","injectionOrder","batch")){ | |
51 if(!(mandcol%in%colnames(idsample))){ | |
52 mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n", | |
53 "Note: table must include this exact column name (it is case-sensitive).\n") | |
54 } | |
55 } | |
56 if(length(mand.check)>1){ | |
57 mand.check <- c(mand.check,"\nFor more information, see the help section or:", | |
58 "\n http://workflow4metabolomics.org/sites/", | |
59 "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n") | |
60 check.err(mand.check) | |
61 } | |
62 | |
63 ### Formating | |
64 idsample[[1]]=make.names(idsample[[1]]) | |
65 dimnames(iddata)[[1]]=iddata[[1]] | |
66 | |
67 ### Transposition of ions data | |
68 idTdata=t(iddata[,2:dim(iddata)[2]]) | |
69 idTdata=data.frame(dimnames(idTdata)[[1]],idTdata) | |
70 | |
71 ### Merge of 2 files (ok even if the two dataframe are not sorted on the same key) | |
72 id=merge(idsample, idTdata, by.x=1, by.y=1) | |
73 | |
74 id$batch=as.factor(id$batch) | |
75 ids=id[id$sampleType == 'pool' | id$sampleType == 'sample',] | |
76 nbid=dim(idsample)[2] | |
77 | |
78 ### Checking the number of sample and pool | |
79 | |
80 # least 2 samples | |
81 if(length(which(ids$sampleType == "sample"))<2){ | |
82 table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.", | |
83 "\nMake sure this is not due to errors in sampleType coding.\n") | |
84 } | |
85 | |
86 # least 2 pools per batch for all batchs | |
87 B <- rep(0,length(levels(ids$batch))) | |
88 for(nbB in length(levels(ids$batch))){ | |
89 B[nbB]<-length(which(ids[which(ids$batch==(levels(ids$batch)[nbB])),]$sampleType == "pool")) | |
90 } | |
91 if(length(which(B>1))==0){ | |
92 table.check <- c(table.check,"\nError: less than 2 pools specified in each batch in sample metadata.", | |
93 "\nMake sure this is not due to errors in sampleType coding.\n") | |
94 } | |
95 | |
96 ### Factor of interest | |
97 factbio=args$ref_factor | |
98 | |
99 | |
100 if(args$analyse == "batch_correction") { | |
101 ## Reading of Metadata Ions file | |
102 metaion=read.table(args$variableMetadata,header=T,sep='\t',check.names=FALSE) | |
103 ## Table match check | |
104 table.check <- c(table.check,match2(iddata,metaion,"variable")) | |
105 check.err(table.check) | |
106 | |
107 ## variables | |
108 detail=args$detail | |
109 method=args$method | |
110 | |
111 ## outputs | |
112 outlog=args$graph_output | |
113 | |
114 ## Launch | |
115 res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,F,F,method,args$span,args$valnull) | |
116 save(res, file=args$rdata_output) | |
117 write.table(reproduceID(res[[1]],res[[3]],"sample",samp.id)$dataMatrix, file=args$dataMatrix_out, sep = '\t', row.names=F, quote=F) | |
118 write.table(res[[2]], file=args$variableMetadata_out, sep = '\t', row.names=F, quote=F) | |
119 }else{ | |
120 ## error check | |
121 check.err(table.check) | |
122 | |
123 ## outputs | |
124 out_graph_pdf=args$out_graph_pdf | |
125 out_preNormSummary=args$out_preNormSummary | |
126 | |
127 ## Launch | |
128 plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,args$span) | |
129 } | |
130 | |
131 rm(args) |