0
|
1 # author: Pauline Ribeyre
|
|
2
|
|
3
|
|
4 #####################
|
|
5 # required packages #
|
|
6 #####################
|
|
7
|
|
8 library("metaMS") # provides "runGC" function
|
|
9
|
|
10
|
|
11 ############
|
|
12 # analysis #
|
|
13 ############
|
|
14
|
|
15 peakspectra_table <- function(GC_results, file_title) {
|
|
16 # Saves the pseudospectra in 2 files (text and tabular formats).
|
|
17
|
|
18 names <- c()
|
|
19 rts <- c()
|
|
20 rt.sds <- c()
|
|
21 mzs_df <- list()
|
|
22 for (ps in GC_results$PseudoSpectra) {
|
|
23
|
|
24 name <- ps$Name
|
|
25 rt <- ps$rt
|
|
26 rt.sd <- ps$rt.sd
|
|
27
|
|
28 names <- c(names, name)
|
|
29 rts <- c(rts, rt)
|
|
30 rt.sds <- c(rt.sds, rt.sd)
|
|
31
|
|
32 spectrum <- data.frame(ps$pspectrum)
|
|
33 mz <- c()
|
|
34 maxo <- c()
|
|
35 for (i in 1:nrow(spectrum)) {
|
|
36 ion <- spectrum[i,]
|
|
37 mz <- c(mz, ion$mz)
|
|
38 maxo <- c(maxo, ion$maxo)
|
|
39 }
|
|
40 df <- data.frame(name, rt, rt.sd, mz, maxo)
|
|
41 mzs_df[[length(mzs_df) + 1]] <- df
|
|
42
|
|
43 }
|
|
44
|
|
45 df = do.call(rbind, mzs_df)
|
|
46
|
|
47 write.table(df,
|
|
48 file = file_title,
|
|
49 quote = FALSE,
|
|
50 row.names = FALSE,
|
|
51 sep = "\t")
|
|
52
|
|
53 }
|
|
54
|
|
55
|
|
56 my_runGC <- function(n, cdf_files, titles_to_test, settings_to_test) {
|
|
57 # Runs the data analysis and records the results.
|
|
58 #
|
|
59 # Args:
|
|
60 # n: index of the current test, to select the corresponding title and settings set.
|
|
61 # cdf_files: list of the data files' names.
|
|
62 # titles_to_test: list of titles (one for each settings set) (concatenation of the values taken by the varied parameters).
|
|
63 # settings_to_test: list of settings sets for runGC.
|
|
64
|
|
65 library("metaMS")
|
|
66
|
|
67 settings <- settings_to_test[n][[1]]
|
|
68 title <- titles_to_test[n]
|
|
69
|
|
70 print(title)
|
|
71
|
|
72 if (!file.exists(paste0("Peak_tables/", title, ".tsv"))) {
|
|
73
|
|
74 # run
|
|
75 GC_results <- runGC(files = cdf_files, settings = settings, returnXset = TRUE, nSlaves = 20)
|
|
76
|
|
77 # order the result table by retention time
|
|
78 peak_table <- GC_results$PeakTable <- GC_results$PeakTable[order(GC_results$PeakTable[,"rt"]),]
|
|
79 peak_table_values <- peak_table[,5:(5 + length(cdf_files) - 1)]
|
|
80 peak_table$nb_zeros <- apply(peak_table_values, 1, function(x) sum(x == 0))
|
|
81 zeros_per_l <- sum(peak_table$nb_zeros)/nrow(peak_table)
|
|
82
|
|
83 # record the table in a file
|
|
84 file_title <- paste0("Peak_tables/", title, ".tsv")
|
|
85 write.table(peak_table, file = file_title, sep = "\t", row.names = FALSE) # /!\ title length -> cannot open connexion
|
|
86
|
|
87 # record the RData
|
|
88 file_title <- paste0("RDatas/", title, ".RData")
|
|
89 # save(GC_results, settings, file = file_title)
|
|
90 save(GC_results, file = file_title)
|
|
91
|
|
92 # record the pseudospectra in files (.msp and .tsv)
|
|
93 file_title <- paste0("Pseudospectra/", title, ".msp")
|
|
94 write.msp(GC_results$PseudoSpectra, file = file_title, newFile = TRUE)
|
|
95 file_title <- paste0("Pseudospectra/", title, ".tsv")
|
|
96 peakspectra_table(GC_results, file_title)
|
|
97
|
|
98 cat(paste(zeros_per_l, "zeros per line.\n\n"))
|
|
99
|
|
100 } # end if
|
|
101
|
|
102 }
|
|
103
|
|
104
|
|
105 runGC_vary_parameters_parallel <- function(nb_cores, cdf_files, settings, vary) {
|
|
106 # Calculates the number of sets of paramaters and runs the analysis on several cores.
|
|
107 #
|
|
108 # Args:
|
|
109 # nb_cores: maximum number of cores to use.
|
|
110 # cdf_files: list of the data files' names.
|
|
111 # settings: default settings for runGC().
|
|
112 # vary: list of parameters to vary and the values each parameter must take.
|
|
113
|
|
114 # calculate the number of possibilities with the parameters' ranges
|
|
115 nb_possibilites <- 1
|
|
116 for (param in vary) {
|
|
117 range_param <- param[3:length(param)]
|
|
118 nb_possibilites <- nb_possibilites * length(range_param)
|
|
119 }
|
|
120 cat("Settings variations:", nb_possibilites, "combinations.\n")
|
|
121
|
|
122 dir.create("RDatas", showWarnings = FALSE) #create the folder where the RDatas will be saved
|
|
123 dir.create("Peak_tables", showWarnings = FALSE) #create the folder where the peak tables will be saved
|
|
124 # dir.create("Ions_per_intensity", showWarnings = FALSE) #create the folder where the nb of ions per intensity range will be saved
|
|
125 dir.create("Pseudospectra", showWarnings = FALSE) #create the folder where the pseudospectra will be saved
|
|
126
|
|
127 time.start <- Sys.time() # start the timer
|
|
128
|
|
129 # run the function on several cores
|
|
130 if (length(titles_to_test) < nb_cores)
|
|
131 nb_cores <- length(titles_to_test)
|
|
132 cluster <- makeCluster(nb_cores)#, outfile = "")
|
|
133 parLapplyLB(cluster, 1:length(titles_to_test), my_runGC, cdf_files, titles_to_test, settings_to_test)
|
|
134 print(titles_to_test)
|
|
135 stopCluster(cluster)
|
|
136
|
|
137 time.end <- Sys.time() # stop the timer
|
|
138 Tdiff <- difftime(time.end, time.start)
|
|
139 print(Tdiff)
|
|
140
|
|
141 }
|
|
142
|
|
143
|
|
144 ############
|
|
145 # main #
|
|
146 ############
|
|
147
|
|
148 runGC_vary_parameters_parallel(nb_cores, cdf_files, settings, vary_list)
|
|
149
|