Mercurial > repos > melpetera > testtest
comparison Testtest/GCMS-test_analyze.R @ 0:40de28c7d3fb draft
Uploaded
author | melpetera |
---|---|
date | Thu, 23 Nov 2017 08:50:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:40de28c7d3fb |
---|---|
1 # author: Pauline Ribeyre | |
2 | |
3 | |
4 ##################### | |
5 # required packages # | |
6 ##################### | |
7 | |
8 library("metaMS") # provides "runGC" function | |
9 | |
10 | |
11 ############ | |
12 # analysis # | |
13 ############ | |
14 | |
15 peakspectra_table <- function(GC_results, file_title) { | |
16 # Saves the pseudospectra in 2 files (text and tabular formats). | |
17 | |
18 names <- c() | |
19 rts <- c() | |
20 rt.sds <- c() | |
21 mzs_df <- list() | |
22 for (ps in GC_results$PseudoSpectra) { | |
23 | |
24 name <- ps$Name | |
25 rt <- ps$rt | |
26 rt.sd <- ps$rt.sd | |
27 | |
28 names <- c(names, name) | |
29 rts <- c(rts, rt) | |
30 rt.sds <- c(rt.sds, rt.sd) | |
31 | |
32 spectrum <- data.frame(ps$pspectrum) | |
33 mz <- c() | |
34 maxo <- c() | |
35 for (i in 1:nrow(spectrum)) { | |
36 ion <- spectrum[i,] | |
37 mz <- c(mz, ion$mz) | |
38 maxo <- c(maxo, ion$maxo) | |
39 } | |
40 df <- data.frame(name, rt, rt.sd, mz, maxo) | |
41 mzs_df[[length(mzs_df) + 1]] <- df | |
42 | |
43 } | |
44 | |
45 df = do.call(rbind, mzs_df) | |
46 | |
47 write.table(df, | |
48 file = file_title, | |
49 quote = FALSE, | |
50 row.names = FALSE, | |
51 sep = "\t") | |
52 | |
53 } | |
54 | |
55 | |
56 my_runGC <- function(n, cdf_files, titles_to_test, settings_to_test) { | |
57 # Runs the data analysis and records the results. | |
58 # | |
59 # Args: | |
60 # n: index of the current test, to select the corresponding title and settings set. | |
61 # cdf_files: list of the data files' names. | |
62 # titles_to_test: list of titles (one for each settings set) (concatenation of the values taken by the varied parameters). | |
63 # settings_to_test: list of settings sets for runGC. | |
64 | |
65 library("metaMS") | |
66 | |
67 settings <- settings_to_test[n][[1]] | |
68 title <- titles_to_test[n] | |
69 | |
70 print(title) | |
71 | |
72 if (!file.exists(paste0("Peak_tables/", title, ".tsv"))) { | |
73 | |
74 # run | |
75 GC_results <- runGC(files = cdf_files, settings = settings, returnXset = TRUE, nSlaves = 20) | |
76 | |
77 # order the result table by retention time | |
78 peak_table <- GC_results$PeakTable <- GC_results$PeakTable[order(GC_results$PeakTable[,"rt"]),] | |
79 peak_table_values <- peak_table[,5:(5 + length(cdf_files) - 1)] | |
80 peak_table$nb_zeros <- apply(peak_table_values, 1, function(x) sum(x == 0)) | |
81 zeros_per_l <- sum(peak_table$nb_zeros)/nrow(peak_table) | |
82 | |
83 # record the table in a file | |
84 file_title <- paste0("Peak_tables/", title, ".tsv") | |
85 write.table(peak_table, file = file_title, sep = "\t", row.names = FALSE) # /!\ title length -> cannot open connexion | |
86 | |
87 # record the RData | |
88 file_title <- paste0("RDatas/", title, ".RData") | |
89 # save(GC_results, settings, file = file_title) | |
90 save(GC_results, file = file_title) | |
91 | |
92 # record the pseudospectra in files (.msp and .tsv) | |
93 file_title <- paste0("Pseudospectra/", title, ".msp") | |
94 write.msp(GC_results$PseudoSpectra, file = file_title, newFile = TRUE) | |
95 file_title <- paste0("Pseudospectra/", title, ".tsv") | |
96 peakspectra_table(GC_results, file_title) | |
97 | |
98 cat(paste(zeros_per_l, "zeros per line.\n\n")) | |
99 | |
100 } # end if | |
101 | |
102 } | |
103 | |
104 | |
105 runGC_vary_parameters_parallel <- function(nb_cores, cdf_files, settings, vary) { | |
106 # Calculates the number of sets of paramaters and runs the analysis on several cores. | |
107 # | |
108 # Args: | |
109 # nb_cores: maximum number of cores to use. | |
110 # cdf_files: list of the data files' names. | |
111 # settings: default settings for runGC(). | |
112 # vary: list of parameters to vary and the values each parameter must take. | |
113 | |
114 # calculate the number of possibilities with the parameters' ranges | |
115 nb_possibilites <- 1 | |
116 for (param in vary) { | |
117 range_param <- param[3:length(param)] | |
118 nb_possibilites <- nb_possibilites * length(range_param) | |
119 } | |
120 cat("Settings variations:", nb_possibilites, "combinations.\n") | |
121 | |
122 dir.create("RDatas", showWarnings = FALSE) #create the folder where the RDatas will be saved | |
123 dir.create("Peak_tables", showWarnings = FALSE) #create the folder where the peak tables will be saved | |
124 # dir.create("Ions_per_intensity", showWarnings = FALSE) #create the folder where the nb of ions per intensity range will be saved | |
125 dir.create("Pseudospectra", showWarnings = FALSE) #create the folder where the pseudospectra will be saved | |
126 | |
127 time.start <- Sys.time() # start the timer | |
128 | |
129 # run the function on several cores | |
130 if (length(titles_to_test) < nb_cores) | |
131 nb_cores <- length(titles_to_test) | |
132 cluster <- makeCluster(nb_cores)#, outfile = "") | |
133 parLapplyLB(cluster, 1:length(titles_to_test), my_runGC, cdf_files, titles_to_test, settings_to_test) | |
134 print(titles_to_test) | |
135 stopCluster(cluster) | |
136 | |
137 time.end <- Sys.time() # stop the timer | |
138 Tdiff <- difftime(time.end, time.start) | |
139 print(Tdiff) | |
140 | |
141 } | |
142 | |
143 | |
144 ############ | |
145 # main # | |
146 ############ | |
147 | |
148 runGC_vary_parameters_parallel(nb_cores, cdf_files, settings, vary_list) | |
149 |