annotate Testtest/GCMS-test_output.R @ 0:40de28c7d3fb draft

Uploaded
author melpetera
date Thu, 23 Nov 2017 08:50:14 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
1 # author: Pauline Ribeyre
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
2
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
3
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
4 #####################
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
5 # required packages #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
6 #####################
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
7
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
8 library("parallel") # provides cluster methods and "parLapplyLB" function
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
9 library("grDevices") # provides jpeg handling methods
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
10
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
11
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
12 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
13 # data #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
14 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
15
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
16 update_list_of_file_names <- function() {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
17 # Writes in a file the list of file names using the files created previously in ./RDatas.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
18
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
19 file.create(source_file_names, showWarnings = FALSE) # erase the file
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
20 directory <- "RDatas"
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
21 files <- list.files(directory, full.names = TRUE)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
22
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
23 for (f in files) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
24 f <- substr(f, 8, nchar(f) - 6)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
25 write(f, file = source_file_names, append = TRUE)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
26 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
27
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
28 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
29
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
30
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
31 titles_to_columns <- function(indicators) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
32 # Parse the "title" column of the indicator dataframe to separate the different parameters and their values.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
33 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
34 # Args:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
35 # indicators: dataframe (one row per test) containing the results.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
36 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
37 # Returns:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
38 # new_indicators: copy of "indicators" with one column added for each parameter that varied.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
39
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
40 default_settings <- FALSE
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
41
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
42 # parse the title
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
43 param_names <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
44 for (title in indicators$title) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
45
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
46 if (title == "default_settings") {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
47 default_settings <- TRUE
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
48 break
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
49 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
50
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
51 else {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
52 params <- strsplit(title, "_")[[1]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
53 for (param in params) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
54 name <- strsplit(param, "=")[[1]][1]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
55
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
56 # some lowercase values create errors because they are primitive functions
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
57 substr(name, 1, 1) <- toupper(substr(name, 1, 1))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
58
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
59 if (!name %in% param_names)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
60 param_names <- c(param_names, name)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
61 value <- strsplit(param, "=")[[1]][2]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
62 if (!exists(name))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
63 assign(name, c())
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
64 assign(name, c(get(name), value))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
65 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
66 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
67
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
68 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
69
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
70 new_indicators <- indicators
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
71
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
72 # add the columns to the dataframe
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
73 if (!default_settings) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
74 for (name in param_names) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
75 new_indicators <- cbind(new_indicators, get(name))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
76 names(new_indicators)[ncol(new_indicators)] <- name
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
77 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
78
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
79 # order the dataframe's columns
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
80 order <- c(1, (ncol(new_indicators) - length(param_names) + 1) : ncol(new_indicators), 2 : (ncol(new_indicators) - length(param_names)))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
81 new_indicators <- new_indicators[, order]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
82 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
83
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
84 return (new_indicators)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
85
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
86 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
87
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
88
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
89 create_summary <- function(nb_cores, count_duplicates = FALSE) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
90 # Reads the files created by runGC_vary_parameters() and calculates the quality criteria for each test.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
91 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
92 # Args:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
93 # nb_cores: maximum number of cores to use.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
94 # count_duplicates: calculate the number of count_duplicates obtained by each test (slow).
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
95 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
96 # Returns:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
97 # A dataframe (one row per test) containing the results:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
98 # title, nb ions, nb zeros/line, nb ions/intensity range, presence of valine, nb count_duplicates (opt).
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
99 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
100 # # Returns:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
101 # # A list containing:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
102 # # 1: dataframe (one row per test) containing the results:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
103 # # title, nb ions, nb zeros/line, nb ions/intensity range, presence of valine, nb count_duplicates (opt).
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
104 # # 2: list of the settings sets for each test.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
105
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
106 time.start <- Sys.time() # start the timer
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
107
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
108 file_names <- readLines(source_file_names)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
109
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
110 dir.create("count_duplicates", showWarnings = FALSE) #create the folder where the details of the count_duplicates will be saved
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
111
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
112 # summ <- create_summary_parallel(1, file_names)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
113 # cat("indic:\n",summary[1][[1]],"\n")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
114
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
115 # run the function on several cores
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
116 if (length(file_names) != 0) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
117 if (length(file_names) < nb_cores)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
118 nb_cores <- length(file_names)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
119 cluster <- makeCluster(nb_cores) #, outfile = "")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
120 summaries <- parLapplyLB(cluster, 1:length(file_names), create_summary_parallel, file_names = file_names, count_duplicates = count_duplicates) #, pb = pb)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
121 stopCluster(cluster)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
122 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
123 else
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
124 stop("There are no files to generate the output from.")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
125
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
126 # concatenate the results obtained by all the cores
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
127 indicators <- NULL
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
128 for (summary in summaries)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
129 indicators <- rbind(indicators, t(summary))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
130 # settings_list <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
131 # for (summary in summaries) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
132 # settings <- summary[2][[1]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
133 # indic <- summary[1][[1]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
134 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
135 # indicators <- rbind(indicators, t(indic))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
136 # settings_list <- c(settings_list, settings)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
137 # }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
138 indicators <- data.frame(indicators)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
139
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
140 col_names <- c("title", "nb_pseudospectra", "zeros_per_line", "f0to10E3", "f10E3to10E4", "f10E4to10E5",
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
141 "f10E5to10E6", "f10E6to10E7", "f10E7")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
142
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
143 if (check_ions)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
144 col_names <- c(col_names, ions_name)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
145
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
146 names(indicators)[1:length(col_names)] <- col_names
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
147 if (count_duplicates)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
148 names(indicators)[(ncol(indicators) - 1):ncol(indicators)] <- c("count_duplicates", "nb_ions")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
149
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
150 indicators <- titles_to_columns(indicators)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
151
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
152 time.end <- Sys.time() # stop the timer
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
153 Tdiff <- difftime(time.end, time.start)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
154 print(Tdiff)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
155
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
156 # return (list(indicators, settings_list))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
157 return (indicators)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
158
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
159 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
160
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
161
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
162 create_summary_parallel <- function(n, file_names, count_duplicates = FALSE) { #}, pb) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
163 # Reads the files created by runGC_vary_parameters() and calculates the quality criteria for each test.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
164 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
165 # Args:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
166 # n: index of the current test, to select the corresponding title.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
167 # file_names: list of titles (one for each test) (concatenation of the values taken by the varied parameters).
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
168 # nb_cores: maximum number of cores to use.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
169 # count_duplicates: calculate the number of count_duplicates obtained by each test (slow).
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
170 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
171 # Returns:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
172 # A dataframe (one row per test) containing the results:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
173 # title, nb ions, nb zeros/line, nb ions/intensity range, presence of valine.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
174 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
175 # # Returns:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
176 # # A list containing:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
177 # # 1: dataframe (one row per test) containing the results:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
178 # # title, nb ions, nb zeros/line, nb ions/intensity range, presence of valine.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
179 # # 2: list of the settings sets for each test.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
180
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
181 source(source_spectrum, environment())
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
182
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
183 intensities_x <- c(1000, 10000, 100000, 1000000, 10000000)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
184
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
185 this_title <- file_names[n]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
186
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
187 # calculate the number of zeros
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
188 file_title <- paste0("Peak_tables/", this_title, ".tsv")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
189 peak_table <- read.table(file_title, sep="\t", header=TRUE)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
190 peak_table_values <- peak_table[,5:ncol(peak_table)]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
191 zeros <- sum(peak_table$nb_zeros)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
192 nb_lines <- nrow(peak_table)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
193 zeros_per_line <- round(zeros/nb_lines, 4)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
194
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
195 # count the number of ions by intensity range
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
196 intensities_y <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
197 intensities_y[1] <- length(rowMeans(
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
198 peak_table_values, na.rm = TRUE)[rowMeans(peak_table_values, na.rm = TRUE) < intensities_x[1]])
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
199 for (i in 2:length(intensities_x))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
200 intensities_y[i] <- length(rowMeans(
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
201 peak_table_values, na.rm = TRUE)[rowMeans(peak_table_values, na.rm = TRUE) < intensities_x[i] &
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
202 rowMeans(peak_table_values, na.rm = TRUE) > intensities_x[i - 1]])
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
203 intensities_y[i + 1] <- length(rowMeans(
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
204 peak_table_values, na.rm = TRUE)[rowMeans(peak_table_values, na.rm = TRUE) > intensities_x[i]])
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
205
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
206 # load the settings
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
207 file_title <- paste0("RDatas/", this_title, ".RData")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
208 load(file_title)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
209
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
210 # count the number of count_duplicates and record in a file
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
211 if (count_duplicates) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
212 file_title <- paste0("count_duplicates/", this_title, ".tsv")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
213 nb_count_duplicates <- data.frame(count_duplicates_function(GC_results))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
214 names(nb_count_duplicates) <- c("mz_min", "mz_max", "rt", "count_duplicates")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
215 write.table(nb_count_duplicates, file = file_title, sep = "\t", row.names = FALSE)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
216 count_duplicates <- nrow(nb_count_duplicates)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
217
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
218 nb_ions <- 0
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
219 pseudospectra <- GC_results$PseudoSpectra
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
220 for (ps in pseudospectra) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
221 nb_ions <- nb_ions + nrow(ps$pspectrum)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
222 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
223 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
224
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
225 summary <- c(this_title, nb_lines, zeros_per_line, intensities_y)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
226
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
227 # check the presence of ions
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
228 if (nb_ions_to_check > 0) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
229 for (i in 1:nb_ions_to_check) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
230 name <- ions_name[[i]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
231 rt <- ions_rt[[i]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
232 mzs <- ions_mzs[[i]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
233 cat("Check:", name, rt, mzs, "\n")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
234
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
235 value <- is_ion_present(GC_results, rt = rt, mz_list = mzs)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
236 assign(name, value)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
237 summary <- c(summary, get(name))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
238 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
239 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
240
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
241 # check the presence of ion valine 12C and 13C
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
242 # valine <- is_ion_present(GC_results, rt = 9.67, mz_list = list(218.1105, 219.1146))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
243
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
244 # summary <- c(summary, valine)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
245
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
246 if (count_duplicates)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
247 summary <- c(summary, count_duplicates, nb_ions)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
248
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
249 # return (list(summary, settings))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
250 return (summary)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
251
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
252 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
253
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
254
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
255 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
256 # graphs #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
257 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
258
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
259 ions_per_intensity <- function(indicators) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
260 # For each test, plots the number of ions for each range of intensity.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
261 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
262 # Args:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
263 # indicators: dataframe (one row per test) containing the results.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
264
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
265 pdf(intensity_graph_out)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
266 par(mar = c(5.1,4.1,5,2.1))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
267
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
268 for (i in 1:nrow(indicators)) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
269
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
270 indic <- indicators[i,]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
271
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
272 names <- c("f0to10E3", "f10E3to10E4", "f10E4to10E5", "f10E5to10E6", "f10E6to10E7", "f10E7")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
273 intensities_y <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
274
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
275 for (colname in names) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
276 intensities_y <- c(intensities_y, indic[colname])
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
277 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
278 intensities_y <- unlist(intensities_y)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
279 intensities_y <- as.numeric(levels(intensities_y))[intensities_y]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
280
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
281 title <- as.character(indic$title)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
282 title <- strsplit(title, "_")[[1]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
283 plot_title <- ""
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
284 for (i in 1:length(title)) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
285 plot_title <- paste(plot_title, title[[i]])
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
286 if (i %% 2 == 0)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
287 plot_title <- paste(plot_title, "\n")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
288 else
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
289 plot_title <- paste(plot_title, " ")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
290 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
291
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
292 barplot(intensities_y, names.arg = names, xlab = "intensity", ylab = "number of ions",
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
293 main = plot_title, cex.main = 0.8)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
294
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
295 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
296
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
297 dev.off()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
298
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
299 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
300
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
301
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
302 graph_results <- function(indicators, criteria) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
303 # Plots the results.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
304 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
305 # Args:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
306 # indicators: dataframe (one row per test) containing the results.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
307
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
308 first_criteria <- grep("nb_pseudospectra", names(indicators))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
309 nb_params <- first_criteria - 2
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
310 this_criteria <- grep(criteria, names(indicators))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
311
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
312 # values taken by each parameter
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
313 values <- list()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
314 for (i in 2:(1 + nb_params)) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
315 lev <- unique(indicators[,i])
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
316 values[[i - 1]] <- lev
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
317 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
318
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
319 length <- lapply(values, length)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
320
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
321 # indexes of the parameters taking the most values
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
322 longest_1 <- which.max(length); length[longest_1] <- -1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
323 longest_2 <- which.max(length); length[longest_2] <- -1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
324 longest_3 <- which.max(length); length[longest_3] <- -1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
325
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
326 # indexes of the other parameters
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
327 shortest <- which(length != -1)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
328
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
329 # all combinations of the values taken by these parameters
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
330 combinations <- list()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
331 for (s in shortest) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
332 page <- indicators[,s + 1]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
333 combinations[[length(combinations) + 1]] <- sort(as.numeric(as.character(unique(page))))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
334 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
335 names(combinations) <- names(indicators)[shortest + 1]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
336 combinations <- expand.grid(combinations)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
337
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
338 # save the plots in a pdf file
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
339 pdf(compare_graph_out)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
340 nb_rows <- length(values[longest_3][[1]])
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
341 nb_cols <- length(values[longest_2][[1]])
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
342 par(mfrow = c(nb_rows, nb_cols), mar = c(4.5,4.5,5,1))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
343
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
344 # plot parameters
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
345 x <- sort(as.numeric(as.character(unique(indicators[,longest_1 + 1]))))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
346 min_zeros <- min(as.numeric(as.character(indicators$zeros_per_line)))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
347 max_zeros <- max(as.numeric(as.character(indicators$zeros_per_line)))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
348
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
349 for (rowi in 1:nrow(combinations)) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
350 row <- combinations[rowi,]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
351 title <- ""
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
352 lines <- indicators
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
353
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
354 for (coli in 1:length(row)) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
355 value <- as.numeric(as.character(row[coli]))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
356 title <- paste(title, names(row[coli]), "=", value)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
357 if (coli %% 2 == 0)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
358 title <- paste(title, "\n")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
359 else
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
360 title <- paste(title, " ")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
361 lines <- lines[lines[names(row[coli])] == value,]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
362 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
363
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
364 # values taken by the parameters
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
365 in_plot <- lines[,longest_1 + 1]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
366 vertical <- lines[,longest_2 + 1]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
367 horizontal <- lines[,longest_3 + 1]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
368
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
369 # for each horizontal value
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
370 for (horiz in sort(unique(horizontal))) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
371
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
372 # for each vertical value
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
373 for (vertic in sort(unique(vertical))) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
374
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
375 y <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
376 for (this_y in sort(unique(in_plot))) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
377 # line <- line[page == p & horizontal == horiz & vertical == vertic & in_plot == this_y,]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
378 line <- lines[horizontal == horiz & vertical == vertic & in_plot == this_y,]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
379 if (nrow(line) != 1)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
380 stop("To plot the results, each set of the parameters' values must be represented exactly 1 time")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
381
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
382 value <- line[,this_criteria]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
383 value <- as.numeric(as.character(value))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
384 y <- c(y, value)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
385 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
386
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
387 this_title <- paste(title,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
388 names(indicators)[longest_3 + 1], "=", horiz, "\n",
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
389 names(indicators)[longest_2 + 1], "=", vertic)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
390
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
391 # plot this graph
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
392 plot(x, y, ylim = c(min_zeros, max_zeros),
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
393 type = "b",
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
394 xlab = names(indicators)[longest_1 + 1],
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
395 ylab = criteria,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
396 main = this_title,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
397 cex.main = 0.8)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
398
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
399 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
400
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
401 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
402
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
403 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
404
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
405 par(mfrow = c(1,1))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
406 dev.off()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
407
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
408 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
409
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
410
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
411 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
412 # main #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
413 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
414
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
415 update_list_of_file_names()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
416 summary <- create_summary(nb_cores, count_duplicates = count_duplicates)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
417
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
418 # settings_list <- summary[2][[1]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
419 # indicators_ <- summary[1][[1]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
420 indicators_ <- summary
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
421
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
422 indicators <- indicators_[order(indicators_$zeros_per_line),] # sort by number of zeros per line
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
423 # indicators_ <- indicators[order(as.numeric(row.names(indicators))),] # order back to row numbers
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
424
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
425 # record the summary in a file
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
426 indicators_to_write <- indicators[, !names(indicators) %in%
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
427 c("title", "f0to10E3", "f10E3to10E4", "f10E4to10E5",
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
428 "f10E5to10E6", "f10E6to10E7", "f10E7")]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
429 write.table(indicators_to_write,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
430 file = summary_out,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
431 quote = FALSE,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
432 row.names = FALSE,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
433 sep = "\t")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
434
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
435 # ions per intensity graph
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
436 ions_per_intensity(indicators_)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
437
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
438 # plots to compare each set of parameters
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
439 graph_results(indicators_, criteria = "zeros_per_line")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
440
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
441 # zip of pseudospectra .msp and .tsv files
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
442 system(paste0('cd Pseudospectra ; ls . | grep -e "msp$" -e "tsv$" | zip -r -@ "peakspectra_out.zip"'))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
443 system(paste("cd Pseudospectra ; mv peakspectra_out.zip", peakspectra_out))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
444
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
445 # zip of count_duplicates .tsv files
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
446 if (count_duplicates) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
447 system(paste0('cd count_duplicates ; ls . | grep "tsv$" | zip -r -@ "count_duplicates_out.zip"'))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
448 system(paste("cd count_duplicates ; mv count_duplicates_out.zip", count_duplicates_out))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
449 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
450