Mercurial > repos > melpetera > testtest
diff Testtest/GCMS-test_analyze.R @ 0:40de28c7d3fb draft
Uploaded
author | melpetera |
---|---|
date | Thu, 23 Nov 2017 08:50:14 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Testtest/GCMS-test_analyze.R Thu Nov 23 08:50:14 2017 -0500 @@ -0,0 +1,149 @@ +# author: Pauline Ribeyre + + +##################### +# required packages # +##################### + +library("metaMS") # provides "runGC" function + + +############ +# analysis # +############ + +peakspectra_table <- function(GC_results, file_title) { + # Saves the pseudospectra in 2 files (text and tabular formats). + + names <- c() + rts <- c() + rt.sds <- c() + mzs_df <- list() + for (ps in GC_results$PseudoSpectra) { + + name <- ps$Name + rt <- ps$rt + rt.sd <- ps$rt.sd + + names <- c(names, name) + rts <- c(rts, rt) + rt.sds <- c(rt.sds, rt.sd) + + spectrum <- data.frame(ps$pspectrum) + mz <- c() + maxo <- c() + for (i in 1:nrow(spectrum)) { + ion <- spectrum[i,] + mz <- c(mz, ion$mz) + maxo <- c(maxo, ion$maxo) + } + df <- data.frame(name, rt, rt.sd, mz, maxo) + mzs_df[[length(mzs_df) + 1]] <- df + + } + + df = do.call(rbind, mzs_df) + + write.table(df, + file = file_title, + quote = FALSE, + row.names = FALSE, + sep = "\t") + +} + + +my_runGC <- function(n, cdf_files, titles_to_test, settings_to_test) { + # Runs the data analysis and records the results. + # + # Args: + # n: index of the current test, to select the corresponding title and settings set. + # cdf_files: list of the data files' names. + # titles_to_test: list of titles (one for each settings set) (concatenation of the values taken by the varied parameters). + # settings_to_test: list of settings sets for runGC. + + library("metaMS") + + settings <- settings_to_test[n][[1]] + title <- titles_to_test[n] + + print(title) + + if (!file.exists(paste0("Peak_tables/", title, ".tsv"))) { + + # run + GC_results <- runGC(files = cdf_files, settings = settings, returnXset = TRUE, nSlaves = 20) + + # order the result table by retention time + peak_table <- GC_results$PeakTable <- GC_results$PeakTable[order(GC_results$PeakTable[,"rt"]),] + peak_table_values <- peak_table[,5:(5 + length(cdf_files) - 1)] + peak_table$nb_zeros <- apply(peak_table_values, 1, function(x) sum(x == 0)) + zeros_per_l <- sum(peak_table$nb_zeros)/nrow(peak_table) + + # record the table in a file + file_title <- paste0("Peak_tables/", title, ".tsv") + write.table(peak_table, file = file_title, sep = "\t", row.names = FALSE) # /!\ title length -> cannot open connexion + + # record the RData + file_title <- paste0("RDatas/", title, ".RData") + # save(GC_results, settings, file = file_title) + save(GC_results, file = file_title) + + # record the pseudospectra in files (.msp and .tsv) + file_title <- paste0("Pseudospectra/", title, ".msp") + write.msp(GC_results$PseudoSpectra, file = file_title, newFile = TRUE) + file_title <- paste0("Pseudospectra/", title, ".tsv") + peakspectra_table(GC_results, file_title) + + cat(paste(zeros_per_l, "zeros per line.\n\n")) + + } # end if + +} + + +runGC_vary_parameters_parallel <- function(nb_cores, cdf_files, settings, vary) { + # Calculates the number of sets of paramaters and runs the analysis on several cores. + # + # Args: + # nb_cores: maximum number of cores to use. + # cdf_files: list of the data files' names. + # settings: default settings for runGC(). + # vary: list of parameters to vary and the values each parameter must take. + + # calculate the number of possibilities with the parameters' ranges + nb_possibilites <- 1 + for (param in vary) { + range_param <- param[3:length(param)] + nb_possibilites <- nb_possibilites * length(range_param) + } + cat("Settings variations:", nb_possibilites, "combinations.\n") + + dir.create("RDatas", showWarnings = FALSE) #create the folder where the RDatas will be saved + dir.create("Peak_tables", showWarnings = FALSE) #create the folder where the peak tables will be saved + # dir.create("Ions_per_intensity", showWarnings = FALSE) #create the folder where the nb of ions per intensity range will be saved + dir.create("Pseudospectra", showWarnings = FALSE) #create the folder where the pseudospectra will be saved + + time.start <- Sys.time() # start the timer + + # run the function on several cores + if (length(titles_to_test) < nb_cores) + nb_cores <- length(titles_to_test) + cluster <- makeCluster(nb_cores)#, outfile = "") + parLapplyLB(cluster, 1:length(titles_to_test), my_runGC, cdf_files, titles_to_test, settings_to_test) + print(titles_to_test) + stopCluster(cluster) + + time.end <- Sys.time() # stop the timer + Tdiff <- difftime(time.end, time.start) + print(Tdiff) + +} + + +############ +# main # +############ + +runGC_vary_parameters_parallel(nb_cores, cdf_files, settings, vary_list) +