annotate lib.r @ 4:9fa5856f6184 draft

planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
author lecorguille
date Thu, 28 Jul 2016 11:11:38 -0400
parents 9f1e18bc8ce3
children 03fdfbd914ab
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
1 ##
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
2 ## This function launch IPO functions to get the best parameters for xcmsSet
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
3 ## A sample among the whole dataset is used to save time
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
4 ##
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
5 ipo4xcmsSet = function(directory, parametersOutput, listArguments, samplebyclass=4) {
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
6 setwd(directory)
4
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
7
3
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
8 files = list.files(".", recursive=T) # "KO/ko15.CDF" "KO/ko16.CDF" "WT/wt15.CDF" "WT/wt16.CDF"
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
9 files_classes = basename(dirname(files)) # "KO", "KO", "WT", "WT"
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
10
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
11 mzmlfile = files
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
12 if (samplebyclass > 0) {
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
13 #random selection of N files for IPO in each class
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
14 classes<-unique(basename(dirname(files)))
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
15 mzmlfile = NULL
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
16 for (class_i in classes){
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
17 files_class_i = files[files_classes==class_i]
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
18 if (samplebyclass > length(files_class_i)) {
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
19 mzmlfile = c(mzmlfile, files_class_i)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
20 } else {
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
21 mzmlfile = c(mzmlfile,sample(files_class_i,samplebyclass))
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
22 }
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
23 }
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
24 }
4
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
25 #@TODO: else, must we keep the RData to been use directly by group?
3
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
26
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
27 cat("\t\tSamples used:\n")
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
28 print(mzmlfile)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
29
4
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
30 peakpickingParameters = getDefaultXcmsSetStartingParams(listArguments[["method"]]) #get default parameters of IPO
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
31
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
32 # filter listArguments to only get releavant parameters and complete with those that are not declared
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
33 peakpickingParametersUser = c(listArguments[names(listArguments) %in% names(peakpickingParameters)], peakpickingParameters[!(names(peakpickingParameters) %in% names(listArguments))])
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
34 peakpickingParametersUser$verbose.columns = TRUE
3
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
35
4
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
36 #peakpickingParametersUser$profparam <- list(step=0.005) #not yet used by IPO have to think of it for futur improvement
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
37 resultPeakpicking = optimizeXcmsSet(mzmlfile, peakpickingParametersUser, subdir="IPO_results") #some images generated by IPO
3
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
38
4
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
39 # export
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
40 resultPeakpicking_best_settings_parameters = resultPeakpicking$best_settings$parameters[!(names(resultPeakpicking$best_settings$parameters) %in% c("nSlaves","verbose.columns"))]
9fa5856f6184 planemo upload commit 98cb24e57c0ce8ac3294e8c7e87aee337ac29f62
lecorguille
parents: 3
diff changeset
41 write.table(resultPeakpicking_best_settings_parameters, file=parametersOutput, sep="\t",row.names=FALSE) #can be read by user
3
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
42
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
43 }
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
44
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
45
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
46
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
47
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
48 ##
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
49 ## This function check if xcms will found all the files
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
50 ##
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
51 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
52 checkFilesCompatibilityWithXcms <- function(directory) {
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
53 cat("Checking files filenames compatibilities with xmcs...\n")
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
54 # WHAT XCMS WILL FIND
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
55 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
56 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
57 info <- file.info(directory)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
58 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
59 files <- c(directory[!info$isdir], listed)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
60 files_abs <- file.path(getwd(), files)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
61 exists <- file.exists(files_abs)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
62 files[exists] <- files_abs[exists]
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
63 files[exists] <- sub("//","/",files[exists])
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
64
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
65 # WHAT IS ON THE FILESYSTEM
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
66 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
67 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
68
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
69 # COMPARISON
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
70 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
71 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
72 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
73 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
74
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
75 }
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
76 }
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
77
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
78
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
79
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
80 ##
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
81 ## This function check if XML contains special caracters. It also checks integrity and completness.
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
82 ##
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
83 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
84 checkXmlStructure <- function (directory) {
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
85 cat("Checking XML structure...\n")
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
86
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
87 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
88 capture=system(cmd,intern=TRUE)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
89
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
90 if (length(capture)>0){
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
91 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
92 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
93 write(capture, stderr())
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
94 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
95 }
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
96
9f1e18bc8ce3 planemo upload commit ceb25d29a013b58d3476323f202276e7c876648a-dirty
lecorguille
parents:
diff changeset
97 }