annotate Testtest/GCMS-test_analyze.R @ 0:40de28c7d3fb draft

Uploaded
author melpetera
date Thu, 23 Nov 2017 08:50:14 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
1 # author: Pauline Ribeyre
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
2
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
3
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
4 #####################
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
5 # required packages #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
6 #####################
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
7
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
8 library("metaMS") # provides "runGC" function
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
9
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
10
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
11 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
12 # analysis #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
13 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
14
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
15 peakspectra_table <- function(GC_results, file_title) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
16 # Saves the pseudospectra in 2 files (text and tabular formats).
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
17
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
18 names <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
19 rts <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
20 rt.sds <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
21 mzs_df <- list()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
22 for (ps in GC_results$PseudoSpectra) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
23
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
24 name <- ps$Name
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
25 rt <- ps$rt
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
26 rt.sd <- ps$rt.sd
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
27
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
28 names <- c(names, name)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
29 rts <- c(rts, rt)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
30 rt.sds <- c(rt.sds, rt.sd)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
31
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
32 spectrum <- data.frame(ps$pspectrum)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
33 mz <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
34 maxo <- c()
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
35 for (i in 1:nrow(spectrum)) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
36 ion <- spectrum[i,]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
37 mz <- c(mz, ion$mz)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
38 maxo <- c(maxo, ion$maxo)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
39 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
40 df <- data.frame(name, rt, rt.sd, mz, maxo)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
41 mzs_df[[length(mzs_df) + 1]] <- df
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
42
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
43 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
44
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
45 df = do.call(rbind, mzs_df)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
46
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
47 write.table(df,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
48 file = file_title,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
49 quote = FALSE,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
50 row.names = FALSE,
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
51 sep = "\t")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
52
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
53 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
54
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
55
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
56 my_runGC <- function(n, cdf_files, titles_to_test, settings_to_test) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
57 # Runs the data analysis and records the results.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
58 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
59 # Args:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
60 # n: index of the current test, to select the corresponding title and settings set.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
61 # cdf_files: list of the data files' names.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
62 # titles_to_test: list of titles (one for each settings set) (concatenation of the values taken by the varied parameters).
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
63 # settings_to_test: list of settings sets for runGC.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
64
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
65 library("metaMS")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
66
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
67 settings <- settings_to_test[n][[1]]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
68 title <- titles_to_test[n]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
69
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
70 print(title)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
71
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
72 if (!file.exists(paste0("Peak_tables/", title, ".tsv"))) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
73
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
74 # run
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
75 GC_results <- runGC(files = cdf_files, settings = settings, returnXset = TRUE, nSlaves = 20)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
76
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
77 # order the result table by retention time
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
78 peak_table <- GC_results$PeakTable <- GC_results$PeakTable[order(GC_results$PeakTable[,"rt"]),]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
79 peak_table_values <- peak_table[,5:(5 + length(cdf_files) - 1)]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
80 peak_table$nb_zeros <- apply(peak_table_values, 1, function(x) sum(x == 0))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
81 zeros_per_l <- sum(peak_table$nb_zeros)/nrow(peak_table)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
82
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
83 # record the table in a file
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
84 file_title <- paste0("Peak_tables/", title, ".tsv")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
85 write.table(peak_table, file = file_title, sep = "\t", row.names = FALSE) # /!\ title length -> cannot open connexion
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
86
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
87 # record the RData
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
88 file_title <- paste0("RDatas/", title, ".RData")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
89 # save(GC_results, settings, file = file_title)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
90 save(GC_results, file = file_title)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
91
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
92 # record the pseudospectra in files (.msp and .tsv)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
93 file_title <- paste0("Pseudospectra/", title, ".msp")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
94 write.msp(GC_results$PseudoSpectra, file = file_title, newFile = TRUE)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
95 file_title <- paste0("Pseudospectra/", title, ".tsv")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
96 peakspectra_table(GC_results, file_title)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
97
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
98 cat(paste(zeros_per_l, "zeros per line.\n\n"))
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
99
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
100 } # end if
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
101
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
102 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
103
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
104
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
105 runGC_vary_parameters_parallel <- function(nb_cores, cdf_files, settings, vary) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
106 # Calculates the number of sets of paramaters and runs the analysis on several cores.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
107 #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
108 # Args:
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
109 # nb_cores: maximum number of cores to use.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
110 # cdf_files: list of the data files' names.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
111 # settings: default settings for runGC().
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
112 # vary: list of parameters to vary and the values each parameter must take.
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
113
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
114 # calculate the number of possibilities with the parameters' ranges
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
115 nb_possibilites <- 1
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
116 for (param in vary) {
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
117 range_param <- param[3:length(param)]
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
118 nb_possibilites <- nb_possibilites * length(range_param)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
119 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
120 cat("Settings variations:", nb_possibilites, "combinations.\n")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
121
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
122 dir.create("RDatas", showWarnings = FALSE) #create the folder where the RDatas will be saved
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
123 dir.create("Peak_tables", showWarnings = FALSE) #create the folder where the peak tables will be saved
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
124 # dir.create("Ions_per_intensity", showWarnings = FALSE) #create the folder where the nb of ions per intensity range will be saved
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
125 dir.create("Pseudospectra", showWarnings = FALSE) #create the folder where the pseudospectra will be saved
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
126
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
127 time.start <- Sys.time() # start the timer
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
128
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
129 # run the function on several cores
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
130 if (length(titles_to_test) < nb_cores)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
131 nb_cores <- length(titles_to_test)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
132 cluster <- makeCluster(nb_cores)#, outfile = "")
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
133 parLapplyLB(cluster, 1:length(titles_to_test), my_runGC, cdf_files, titles_to_test, settings_to_test)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
134 print(titles_to_test)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
135 stopCluster(cluster)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
136
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
137 time.end <- Sys.time() # stop the timer
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
138 Tdiff <- difftime(time.end, time.start)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
139 print(Tdiff)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
140
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
141 }
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
142
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
143
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
144 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
145 # main #
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
146 ############
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
147
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
148 runGC_vary_parameters_parallel(nb_cores, cdf_files, settings, vary_list)
40de28c7d3fb Uploaded
melpetera
parents:
diff changeset
149