Mercurial > repos > lecorguille > ipo
annotate lib.r @ 0:b2032600d98f draft
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
| author | lecorguille |
|---|---|
| date | Tue, 24 May 2016 19:49:14 -0400 |
| parents | |
| children | 508ab373b524 |
| rev | line source |
|---|---|
|
0
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
1 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
2 ## |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
3 ## This function launch IPO functions to get the best parameters for xcmsSet |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
4 ## A sample among the whole dataset is used to save time |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
5 ## |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
6 ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) { |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
7 setwd(directory) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
8 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF" |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
9 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT" |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
10 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
11 mzmlfile = files |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
12 if (samplebyclass > 0) { |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
13 #random selection of N files for IPO in each class |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
14 classes<-unique(basename(dirname(files))) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
15 mzmlfile = NULL |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
16 for (class_i in classes){ |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
17 files_class_i = files[files_classes==class_i] |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
18 if (samplebyclass > length(files_class_i)) { |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
19 mzmlfile = c(mzmlfile, files_class_i) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
20 } else { |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
21 mzmlfile = c(mzmlfile,sample(files_class_i,samplebyclass)) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
22 } |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
23 } |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
24 } |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
25 #TODO: else, must we keep the RData to been use directly by group? |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
26 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
27 cat("\t\tSamples used:\n") |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
28 print(mzmlfile) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
29 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
30 paramsPP <- getDefaultXcmsSetStartingParams(listArguments[["method"]]) #load default parameters of IPO |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
31 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
32 #user defined new parameters |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
33 paramsPP$ppm <- listArguments[["ppm"]] |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
34 paramsPP$min_peakwidth <- listArguments[["min_peakwidth"]] |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
35 paramsPP$max_peakwidth <- listArguments[["max_peakwidth"]] |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
36 paramsPP$nSlaves <- listArguments[["nSlaves"]] |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
37 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
38 #paramsPP$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
39 resultPPpos <- optimizeXcmsSet(mzmlfile, paramsPP, subdir="IPO_results") #some images generated by IPO |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
40 write.table(resultPPpos$best_settings$parameters, file=parametersOutput, sep="\t",row.names=FALSE) #can be read by user |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
41 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
42 } |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
43 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
44 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
45 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
46 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
47 ## |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
48 ## This function check if xcms will found all the files |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
49 ## |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
50 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
51 checkFilesCompatibilityWithXcms <- function(directory) { |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
52 cat("Checking files filenames compatibilities with xmcs...\n") |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
53 # WHAT XCMS WILL FIND |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
54 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
55 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
56 info <- file.info(directory) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
57 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
58 files <- c(directory[!info$isdir], listed) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
59 files_abs <- file.path(getwd(), files) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
60 exists <- file.exists(files_abs) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
61 files[exists] <- files_abs[exists] |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
62 files[exists] <- sub("//","/",files[exists]) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
63 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
64 # WHAT IS ON THE FILESYSTEM |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
65 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
66 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
67 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
68 # COMPARISON |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
69 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
70 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
71 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
72 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
73 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
74 } |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
75 } |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
76 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
77 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
78 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
79 ## |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
80 ## This function check if XML contains special caracters. It also checks integrity and completness. |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
81 ## |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
82 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
83 checkXmlStructure <- function (directory) { |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
84 cat("Checking XML structure...\n") |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
85 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
86 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
87 capture=system(cmd,intern=TRUE) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
88 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
89 if (length(capture)>0){ |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
90 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
91 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
92 write(capture, stderr()) |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
93 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
94 } |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
95 |
|
b2032600d98f
planemo upload commit ddb46a9ade365cbe01b3ff9f50dffa0140136632
lecorguille
parents:
diff
changeset
|
96 } |
