diff Testtest/GCMS-test_analyze.R @ 0:40de28c7d3fb draft

Uploaded
author melpetera
date Thu, 23 Nov 2017 08:50:14 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Testtest/GCMS-test_analyze.R	Thu Nov 23 08:50:14 2017 -0500
@@ -0,0 +1,149 @@
+# author: Pauline Ribeyre
+
+
+#####################
+# required packages #
+#####################
+
+library("metaMS") # provides "runGC" function
+
+
+############
+# analysis #
+############
+
+peakspectra_table <- function(GC_results, file_title) {
+  # Saves the pseudospectra in 2 files (text and tabular formats).
+  
+  names <- c()
+  rts <- c()
+  rt.sds <- c()
+  mzs_df <- list()
+  for (ps in GC_results$PseudoSpectra) {
+    
+    name <- ps$Name
+    rt <- ps$rt
+    rt.sd <- ps$rt.sd
+    
+    names <- c(names, name)
+    rts <- c(rts, rt)
+    rt.sds <- c(rt.sds, rt.sd)
+    
+    spectrum <- data.frame(ps$pspectrum)
+    mz <- c()
+    maxo <- c()
+    for (i in 1:nrow(spectrum)) {
+      ion <- spectrum[i,]
+      mz <- c(mz, ion$mz)
+      maxo <- c(maxo, ion$maxo)
+    }
+    df <- data.frame(name, rt, rt.sd, mz, maxo)
+    mzs_df[[length(mzs_df) + 1]] <- df
+    
+  }
+  
+  df = do.call(rbind, mzs_df)
+  
+  write.table(df,
+              file = file_title,
+              quote = FALSE,
+              row.names = FALSE,
+              sep = "\t")
+  
+}
+
+
+my_runGC <- function(n, cdf_files, titles_to_test, settings_to_test) {
+  # Runs the data analysis and records the results.
+  #
+  # Args:
+  #   n: index of the current test, to select the corresponding title and settings set.
+  #   cdf_files: list of the data files' names.
+  #   titles_to_test:  list of titles (one for each settings set) (concatenation of the values taken by the varied parameters).
+  #   settings_to_test: list of settings sets for runGC.
+  
+  library("metaMS")
+  
+  settings <- settings_to_test[n][[1]]
+  title <- titles_to_test[n]
+  
+  print(title)
+  
+  if (!file.exists(paste0("Peak_tables/", title, ".tsv"))) {
+
+    # run
+    GC_results <- runGC(files = cdf_files, settings = settings, returnXset = TRUE, nSlaves = 20)
+
+    # order the result table by retention time
+    peak_table <- GC_results$PeakTable <- GC_results$PeakTable[order(GC_results$PeakTable[,"rt"]),]
+    peak_table_values <- peak_table[,5:(5 + length(cdf_files) - 1)]
+    peak_table$nb_zeros <- apply(peak_table_values, 1, function(x) sum(x == 0))
+    zeros_per_l <- sum(peak_table$nb_zeros)/nrow(peak_table)
+
+    # record the table in a file
+    file_title <- paste0("Peak_tables/", title, ".tsv")
+    write.table(peak_table, file = file_title, sep = "\t", row.names = FALSE) # /!\ title length -> cannot open connexion
+    
+    # record the RData
+    file_title <- paste0("RDatas/", title, ".RData")
+    # save(GC_results, settings, file = file_title)
+    save(GC_results, file = file_title)
+    
+    # record the pseudospectra in files (.msp and .tsv)
+    file_title <- paste0("Pseudospectra/", title, ".msp")
+    write.msp(GC_results$PseudoSpectra, file = file_title, newFile = TRUE)
+    file_title <- paste0("Pseudospectra/", title, ".tsv")
+    peakspectra_table(GC_results, file_title)
+
+    cat(paste(zeros_per_l, "zeros per line.\n\n"))
+
+  } # end if
+  
+}
+
+
+runGC_vary_parameters_parallel <- function(nb_cores, cdf_files, settings, vary) {
+  # Calculates the number of sets of paramaters and runs the analysis on several cores.
+  #
+  # Args:
+  #   nb_cores: maximum number of cores to use.
+  #   cdf_files: list of the data files' names.
+  #   settings: default settings for runGC().
+  #   vary: list of parameters to vary and the values each parameter must take.
+  
+  # calculate the number of possibilities with the parameters' ranges
+  nb_possibilites <- 1
+  for (param in vary) {
+    range_param <- param[3:length(param)]
+    nb_possibilites <- nb_possibilites * length(range_param)
+  }
+  cat("Settings variations:", nb_possibilites, "combinations.\n")
+  
+  dir.create("RDatas", showWarnings = FALSE) #create the folder where the RDatas will be saved
+  dir.create("Peak_tables", showWarnings = FALSE) #create the folder where the peak tables will be saved
+  # dir.create("Ions_per_intensity", showWarnings = FALSE) #create the folder where the nb of ions per intensity range will be saved
+  dir.create("Pseudospectra", showWarnings = FALSE) #create the folder where the pseudospectra will be saved
+
+  time.start <- Sys.time() # start the timer
+
+  # run the function on several cores
+  if (length(titles_to_test) < nb_cores)
+    nb_cores <- length(titles_to_test)
+  cluster <- makeCluster(nb_cores)#, outfile = "")
+  parLapplyLB(cluster, 1:length(titles_to_test), my_runGC, cdf_files, titles_to_test, settings_to_test)
+  print(titles_to_test)
+  stopCluster(cluster)
+
+  time.end <- Sys.time() # stop the timer
+  Tdiff <- difftime(time.end, time.start)
+  print(Tdiff)
+  
+}
+
+
+############
+#   main   #
+############
+
+runGC_vary_parameters_parallel(nb_cores, cdf_files, settings, vary_list)
+