annotate lib.r @ 1:508ab373b524 draft

planemo upload commit 2c68962f28b58980db71ac15b6a3e75bbcf7885b-dirty
author lecorguille
date Mon, 25 Jul 2016 09:49:16 -0400
parents b2032600d98f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
1 ##
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
2 ## This function launch IPO functions to get the best parameters for xcmsSet
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
3 ## A sample among the whole dataset is used to save time
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
4 ##
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
5 ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) {
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
6 setwd(directory)
1
508ab373b524 planemo upload commit 2c68962f28b58980db71ac15b6a3e75bbcf7885b-dirty
lecorguille
parents: 0
diff changeset
7 #@TODO: check the behaviour of direcory = "." ?
0
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
8 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
9 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT"
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
10
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
11 mzmlfile = files
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
12 if (samplebyclass > 0) {
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
13 #random selection of N files for IPO in each class
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
14 classes<-unique(basename(dirname(files)))
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
15 mzmlfile = NULL
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
16 for (class_i in classes){
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
17 files_class_i = files[files_classes==class_i]
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
18 if (samplebyclass > length(files_class_i)) {
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
19 mzmlfile = c(mzmlfile, files_class_i)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
20 } else {
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
21 mzmlfile = c(mzmlfile,sample(files_class_i,samplebyclass))
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
22 }
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
23 }
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
24 }
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
25 #TODO: else, must we keep the RData to been use directly by group?
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
26
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
27 cat("\t\tSamples used:\n")
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
28 print(mzmlfile)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
29
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
30 paramsPP <- getDefaultXcmsSetStartingParams(listArguments[["method"]]) #load default parameters of IPO
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
31
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
32 #user defined new parameters
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
33 paramsPP$ppm <- listArguments[["ppm"]]
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
34 paramsPP$min_peakwidth <- listArguments[["min_peakwidth"]]
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
35 paramsPP$max_peakwidth <- listArguments[["max_peakwidth"]]
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
36 paramsPP$nSlaves <- listArguments[["nSlaves"]]
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
37
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
38 #paramsPP$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
39 resultPPpos <- optimizeXcmsSet(mzmlfile, paramsPP, subdir="IPO_results") #some images generated by IPO
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
40 write.table(resultPPpos$best_settings$parameters, file=parametersOutput, sep="\t",row.names=FALSE) #can be read by user
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
41
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
42 }
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
43
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
44
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
45
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
46
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
47 ##
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
48 ## This function check if xcms will found all the files
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
49 ##
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
50 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
51 checkFilesCompatibilityWithXcms <- function(directory) {
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
52 cat("Checking files filenames compatibilities with xmcs...\n")
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
53 # WHAT XCMS WILL FIND
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
54 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
55 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
56 info <- file.info(directory)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
57 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
58 files <- c(directory[!info$isdir], listed)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
59 files_abs <- file.path(getwd(), files)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
60 exists <- file.exists(files_abs)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
61 files[exists] <- files_abs[exists]
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
62 files[exists] <- sub("//","/",files[exists])
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
63
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
64 # WHAT IS ON THE FILESYSTEM
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
65 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
66 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
67
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
68 # COMPARISON
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
69 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
70 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
71 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
72 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
73
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
74 }
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
75 }
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
76
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
77
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
78
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
79 ##
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
80 ## This function check if XML contains special caracters. It also checks integrity and completness.
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
81 ##
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
82 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
83 checkXmlStructure <- function (directory) {
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
84 cat("Checking XML structure...\n")
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
85
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
86 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
87 capture=system(cmd,intern=TRUE)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
88
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
89 if (length(capture)>0){
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
90 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
91 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
92 write(capture, stderr())
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
93 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
94 }
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
95
b2032600d98f planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff changeset
96 }