Mercurial > repos > melpetera > testtest
view Testtest/GCMS-test_analyze.R @ 0:40de28c7d3fb draft
Uploaded
author | melpetera |
---|---|
date | Thu, 23 Nov 2017 08:50:14 -0500 |
parents | |
children |
line wrap: on
line source
# author: Pauline Ribeyre ##################### # required packages # ##################### library("metaMS") # provides "runGC" function ############ # analysis # ############ peakspectra_table <- function(GC_results, file_title) { # Saves the pseudospectra in 2 files (text and tabular formats). names <- c() rts <- c() rt.sds <- c() mzs_df <- list() for (ps in GC_results$PseudoSpectra) { name <- ps$Name rt <- ps$rt rt.sd <- ps$rt.sd names <- c(names, name) rts <- c(rts, rt) rt.sds <- c(rt.sds, rt.sd) spectrum <- data.frame(ps$pspectrum) mz <- c() maxo <- c() for (i in 1:nrow(spectrum)) { ion <- spectrum[i,] mz <- c(mz, ion$mz) maxo <- c(maxo, ion$maxo) } df <- data.frame(name, rt, rt.sd, mz, maxo) mzs_df[[length(mzs_df) + 1]] <- df } df = do.call(rbind, mzs_df) write.table(df, file = file_title, quote = FALSE, row.names = FALSE, sep = "\t") } my_runGC <- function(n, cdf_files, titles_to_test, settings_to_test) { # Runs the data analysis and records the results. # # Args: # n: index of the current test, to select the corresponding title and settings set. # cdf_files: list of the data files' names. # titles_to_test: list of titles (one for each settings set) (concatenation of the values taken by the varied parameters). # settings_to_test: list of settings sets for runGC. library("metaMS") settings <- settings_to_test[n][[1]] title <- titles_to_test[n] print(title) if (!file.exists(paste0("Peak_tables/", title, ".tsv"))) { # run GC_results <- runGC(files = cdf_files, settings = settings, returnXset = TRUE, nSlaves = 20) # order the result table by retention time peak_table <- GC_results$PeakTable <- GC_results$PeakTable[order(GC_results$PeakTable[,"rt"]),] peak_table_values <- peak_table[,5:(5 + length(cdf_files) - 1)] peak_table$nb_zeros <- apply(peak_table_values, 1, function(x) sum(x == 0)) zeros_per_l <- sum(peak_table$nb_zeros)/nrow(peak_table) # record the table in a file file_title <- paste0("Peak_tables/", title, ".tsv") write.table(peak_table, file = file_title, sep = "\t", row.names = FALSE) # /!\ title length -> cannot open connexion # record the RData file_title <- paste0("RDatas/", title, ".RData") # save(GC_results, settings, file = file_title) save(GC_results, file = file_title) # record the pseudospectra in files (.msp and .tsv) file_title <- paste0("Pseudospectra/", title, ".msp") write.msp(GC_results$PseudoSpectra, file = file_title, newFile = TRUE) file_title <- paste0("Pseudospectra/", title, ".tsv") peakspectra_table(GC_results, file_title) cat(paste(zeros_per_l, "zeros per line.\n\n")) } # end if } runGC_vary_parameters_parallel <- function(nb_cores, cdf_files, settings, vary) { # Calculates the number of sets of paramaters and runs the analysis on several cores. # # Args: # nb_cores: maximum number of cores to use. # cdf_files: list of the data files' names. # settings: default settings for runGC(). # vary: list of parameters to vary and the values each parameter must take. # calculate the number of possibilities with the parameters' ranges nb_possibilites <- 1 for (param in vary) { range_param <- param[3:length(param)] nb_possibilites <- nb_possibilites * length(range_param) } cat("Settings variations:", nb_possibilites, "combinations.\n") dir.create("RDatas", showWarnings = FALSE) #create the folder where the RDatas will be saved dir.create("Peak_tables", showWarnings = FALSE) #create the folder where the peak tables will be saved # dir.create("Ions_per_intensity", showWarnings = FALSE) #create the folder where the nb of ions per intensity range will be saved dir.create("Pseudospectra", showWarnings = FALSE) #create the folder where the pseudospectra will be saved time.start <- Sys.time() # start the timer # run the function on several cores if (length(titles_to_test) < nb_cores) nb_cores <- length(titles_to_test) cluster <- makeCluster(nb_cores)#, outfile = "") parLapplyLB(cluster, 1:length(titles_to_test), my_runGC, cdf_files, titles_to_test, settings_to_test) print(titles_to_test) stopCluster(cluster) time.end <- Sys.time() # stop the timer Tdiff <- difftime(time.end, time.start) print(Tdiff) } ############ # main # ############ runGC_vary_parameters_parallel(nb_cores, cdf_files, settings, vary_list)