# HG changeset patch
# User melpetera
# Date 1511445014 18000
# Node ID 40de28c7d3fbbf9f72e08ec90ecd8087bbcd4bcc

Uploaded

diff -r 000000000000 -r 40de28c7d3fb Testtest/GCMS-test.R
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Testtest/GCMS-test.R	Thu Nov 23 08:50:14 2017 -0500
@@ -0,0 +1,220 @@
+# author: Pauline Ribeyre
+
+
+#####################
+# required packages #
+#####################
+
+library(batch) # provides "parseCommandargv" function
+
+
+############
+#   init   #
+############
+
+# read the parameters
+argv_list <- parseCommandArgs(evaluate = FALSE) #, trailingOnly = TRUE)
+argv <- unlist(argv_list)
+
+# common environment to share variables between files
+env <- new.env()
+
+# timestamp made unique by adding a random element
+env$timestamp <- round(as.numeric(Sys.time()) + runif(1, 0, 100))
+
+# set working directory
+tmpwd <- paste0("/galaxy/data/", env$timestamp)
+# system("chmod -R 0777 .")
+# dir.create(tmpwd, mode = "0777")
+dir.create(tmpwd)
+setwd(tmpwd)
+
+# readme file
+readme_msg <- paste0("\"", env$timestamp, "\"",
+                     " is a temporary folder storing data for an instance of Galaxy tool \"GCMS-test\" launched on ",
+                    Sys.time(), ". ",
+                    "This folder should be deleted automatically at the end of the tool's execution. ",
+                    "If the instance ended or has been aborted and this folder still exists, it can be deleted manually.")
+write(readme_msg, file = "README.txt")
+
+################
+# source files #
+################
+
+source_local <- function(fname){
+  # Gets a file's full name in Galaxy's directories.
+  #
+  # Args:
+  #   fname: the name of the file.
+  #
+  # Returns:
+  #   The full name of the file.
+
+  argv <- commandArgs(trailingOnly = FALSE)
+  base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
+  return (paste(base_dir, fname, sep="/"))
+
+}
+
+env$source_settings <- source_local("GCMS-test_settings.R")
+env$source_analyze <- source_local("GCMS-test_analyze.R")
+env$source_output <- source_local("GCMS-test_output.R")
+env$source_spectrum <- source_local("GCMS-test_spectrum.R")
+
+env$source_file_names <- "file_names.txt"
+
+
+#############
+# functions #
+#############
+
+args_to_vary_list <- function() {
+  # Creates the list of settings variations using the arguments.
+  #
+  # Returns:
+  #   The list of settings variations.
+
+  param <- c("step", "steps", "mzdiff", "fwhm", "simthresh", "snthresh", "max",
+             "minclassfraction", "minclasssize", "rtdiff") #, "minfeat")
+
+  PeakPicking_param <- c("step", "steps", "mzdiff", "fwhm", "snthresh", "max")
+  betweenSamples_param <- c("min.class.fraction", "min.class.size", "rtdiff", "simthresh")
+
+  vary <- list()
+
+  for (p in param) {
+
+    min <- as.numeric(argv[[paste0(p, "_min")]])
+    max <- as.numeric(argv[[paste0(p, "_max")]])
+    step <- as.numeric(argv[[paste0(p, "_step")]])
+
+    if (p == "minclassfraction")
+      p <- "min.class.fraction"
+    else if (p == "minclasssize")
+      p <- "min.class.size"
+
+    if (p %in% PeakPicking_param)
+      name <- paste0("PeakPicking$", p)
+    else if (p %in% betweenSamples_param)
+      name <- paste0("betweenSamples.", p)
+    else
+      stop("\"", p, "\" is not a valid parameter to vary.")
+
+    # prevent seq error
+    if (min > max)
+      max <- min
+
+    range <- seq(min, max, step)
+    vary[[length(vary) + 1]] <- c(p, name, range)
+
+  }
+
+  return (vary)
+
+}
+
+
+read_ions_to_check <- function() {
+  # Reads the arguments concerning the ions to check.
+  
+  env$ions_name <- list()
+  env$ions_rt <- list()
+  env$ions_mzs <- list()
+  
+  env$nb_ions_to_check <- length(grep("ions_name_", names(argv)))
+  
+  if (env$nb_ions_to_check > 0) {
+    for (i in 1:env$nb_ions_to_check) {
+      env$ions_name[length(env$ions_name) + 1] <- argv[[paste0("ions_name_", i)]]
+      env$ions_rt[length(env$ions_rt) + 1] <- as.numeric(argv[[paste0("ions_rt_", i)]])
+      mzs <- argv[[paste0("ions_mzs_", i)]]
+      mzs <- as.numeric(strsplit(mzs, ",")[[1]])
+      env$ions_mzs[length(env$ions_mzs) + 1] <- list(mzs)
+    }
+  }
+  
+}
+
+
+############
+#  inputs  #
+############
+
+env$nb_cores <- as.numeric(argv[["nb_cores"]])
+cat("Nb cores:", env$nb_cores, "\n")
+
+# get data from zip file
+if (!is.null(argv[["zip_file"]])) {
+  env$cdf_files <- unzip(argv[["zip_file"]])
+} else {
+  stop("No files have been provided.")
+}
+
+# vary parameters
+env$vary_list <- args_to_vary_list()
+
+# count duplicate ions
+env$count_duplicates <- argv[["count_duplicates"]] == "true"
+if (env$count_duplicates) {
+  
+  # delta rt
+  env$duplicates_delta_rt <- as.numeric(argv[["duplicates_delta_rt"]])
+  
+  # delta mz
+  env$duplicates_delta_mz <- as.numeric(argv[["duplicates_delta_mz"]])
+  
+}
+
+# check the presence of ions
+env$nb_ions_to_check <- 0
+env$check_ions <- argv[["check_ions"]] == "true"
+if (env$check_ions) {
+  
+  read_ions_to_check()
+  
+  # delta rt
+  env$ions_delta_rt <- as.numeric(argv[["ions_delta_rt"]])
+  
+  # delta mz
+  env$ions_delta_mz <- as.numeric(argv[["ions_delta_mz"]])
+  
+}
+
+
+#############
+#  outputs  #
+#############
+
+env$summary_out <- argv[["summary_out"]]
+env$peakspectra_out <- argv[["peakspectra_out"]]
+env$intensity_graph_out <- argv[["intensity_graph_out"]]
+
+if (env$count_duplicates)
+  env$count_duplicates_out <- argv[["duplicates_out"]]
+
+env$compare_graph_out <- argv[["compare_graph_out"]]
+
+
+##########
+#  run   #
+##########
+
+cat("---------- Settings ----------\n")
+sys.source(env$source_settings, env)
+
+
+cat("---------- Analysis ----------\n")
+sys.source(env$source_analyze, env)
+
+
+cat("----------  Output  ----------\n")
+sys.source(env$source_output, env)
+
+
+##########
+#  end   #
+##########
+
+# delete temporary files
+setwd("..")
+unlink(tmpwd, recursive = TRUE)
diff -r 000000000000 -r 40de28c7d3fb Testtest/GCMS-test.xml
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Testtest/GCMS-test.xml	Thu Nov 23 08:50:14 2017 -0500
@@ -0,0 +1,263 @@
+<tool id="GCMSt" name="GCMS-test" version="17.11">
+	<description>Test parameters for GC-MS data preprocessing using metaMS package</description>
+    
+    <requirements>
+        <requirement type="package" version="1.8.0">bioconductor-metams</requirement>
+        <requirement type="package" version="1.46.0">bioconductor-xcms</requirement>
+        <requirement type="package" version="1.1_4">r-batch</requirement>
+    </requirements>
+    
+    <stdio>
+        <exit_code range="1:" level="fatal" />
+    </stdio>
+
+	<command>
+		Rscript $__tool_directory__/GCMS-test.R
+		
+		<!-- Inputs -->
+		
+		<!-- nb_cores \${GALAXY_SLOTS:-1} -->
+		nb_cores \${GALAXY_SLOTS}
+		
+		zip_file $zip_file
+		
+		step_min $step.step_min
+		step_max $step.step_max
+		step_step $step.step_step
+		
+		steps_min $steps.steps_min
+		steps_max $steps.steps_max
+		steps_step $steps.steps_step
+		
+		mzdiff_min $mzdiff.mzdiff_min
+		mzdiff_max $mzdiff.mzdiff_max
+		mzdiff_step $mzdiff.mzdiff_step
+		
+		fwhm_min $fwhm.fwhm_min
+		fwhm_max $fwhm.fwhm_max
+		fwhm_step $fwhm.fwhm_step
+		
+		simthresh_min $simthresh.simthresh_min
+		simthresh_max $simthresh.simthresh_max
+		simthresh_step $simthresh.simthresh_step
+		
+		snthresh_min $snthresh.snthresh_min
+		snthresh_max $snthresh.snthresh_max
+		snthresh_step $snthresh.snthresh_step
+		
+		max_min $max.max_min
+		max_max $max.max_max
+		max_step $max.max_step
+		
+		minclassfraction_min $minclassfraction.minclassfraction_min
+		minclassfraction_max $minclassfraction.minclassfraction_max
+		minclassfraction_step $minclassfraction.minclassfraction_step
+		
+		minclasssize_min $minclasssize.minclasssize_min
+		minclasssize_max $minclasssize.minclasssize_max
+		minclasssize_step $minclasssize.minclasssize_step
+		
+		rtdiff_min $rtdiff.rtdiff_min
+		rtdiff_max $rtdiff.rtdiff_max
+		rtdiff_step $rtdiff.rtdiff_step
+		
+		<!--minfeat_min $minfeat.minfeat_min
+		minfeat_max $minfeat.minfeat_max
+		minfeat_step $minfeat.minfeat_step-->
+		
+		count_duplicates $outputs_options.count_duplicates.count_duplicates_bool
+		#if $outputs_options.count_duplicates.count_duplicates_bool:
+			duplicates_delta_rt $outputs_options.count_duplicates.duplicates_delta_rt
+			duplicates_delta_mz $outputs_options.count_duplicates.duplicates_delta_mz
+		#end if
+		
+		check_ions $outputs_options.check_ions.check_ions_bool
+		#if $outputs_options.check_ions.check_ions_bool:
+			ions_delta_rt $outputs_options.check_ions.ions_delta_rt
+			ions_delta_mz $outputs_options.check_ions.ions_delta_mz
+			
+			#for $i, $s in enumerate($outputs_options.check_ions.ions_to_check)
+				ions_name_${i+1} $s.name
+				ions_rt_${i+1} $s.rt
+				ions_mzs_${i+1} $s.mzs
+			#end for
+		#end if
+		
+		<!-- Outputs -->
+		
+		summary_out $summary_out
+		peakspectra_out $peakspectra_out
+		intensity_graph_out $intensity_graph_out
+		
+		#if $outputs_options.count_duplicates.count_duplicates_bool:
+			duplicates_out $duplicates_out
+		#end if
+		
+		compare_graph_out $compare_graph_out
+	</command>
+  
+	<inputs>
+		<param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" help="Zip file containing the chromatograms to analyse" />
+		
+		<section name="fwhm" title="FWHM (default: 6)" help="The full width at half maximum of the peaks" >
+			<param name="fwhm_min" value="6" type="float" label="From" />
+			<param name="fwhm_max" value="6" type="float" label="To" />
+			<param name="fwhm_step" value="1" type="float" label="Step" />
+		</section>
+		
+		<section name="max" title="Max (default: 500)" help="Maximum number of peaks per extracted ion chromatogram" >
+			<param name="max_min" value="500" type="integer" label="From" />
+			<param name="max_max" value="500" type="integer" label="To" />
+			<param name="max_step" value="1" type="integer" label="Step" />
+		</section>
+		
+		<section name="minclassfraction" title="Minimum class fraction (default: 0.5)" help="The fraction of samples in which a pseudospectrum is present before it is regarded as an unknown" >
+			<param name="minclassfraction_min" value="0.5" type="float" label="From" />
+			<param name="minclassfraction_max" value="0.5" type="float" label="To" />
+			<param name="minclassfraction_step" value="0.5" type="float" label="Step" />
+		</section>
+		
+		<section name="minclasssize" title="Minimum class size (default: 5)" help="The absolute number of samples in which a pseudospectrum is present before it is regarded as an unknown" >
+			<param name="minclasssize_min" value="5" type="integer" label="From" />
+			<param name="minclasssize_max" value="5" type="integer" label="To" />
+			<param name="minclasssize_step" value="1" type="integer" label="Step" />
+		</section>
+		
+		<!-- <section name="minfeat" title="minfeat (default: 0.05)" help="The minimum number of ion in a mass spectra to consider it a molecule" >
+			<param name="minfeat_min" value="0.05" type="float" label="From" />
+			<param name="minfeat_max" value="0.05" type="float" label="To" />
+			<param name="minfeat_step" value="0.01" type="float" label="Step" />
+		</section> -->
+		
+		<section name="mzdiff" title="m/z diff (default: 0.05)" help="Minimum difference in m/z for peaks with overlapping retention times" >
+			<param name="mzdiff_min" value="0.05" type="float" label="From" />
+			<param name="mzdiff_max" value="0.05" type="float" label="To" />
+			<param name="mzdiff_step" value="0.01" type="float" label="Step" />
+		</section>
+		
+		<section name="rtdiff" title="RT diff (default: 0.05)" help="The allowed RT shift between same molecules in different samples" >
+			<param name="rtdiff_min" value="0.05" type="float" label="From" />
+			<param name="rtdiff_max" value="0.05" type="float" label="To" />
+			<param name="rtdiff_step" value="0.01" type="float" label="Step" />
+		</section>
+		
+		<section name="simthresh" title="Similarity threshold (default: 0.7)" help="The minimum similarity allowed between peaks' mass spectra to be considered as equal" >
+			<param name="simthresh_min" value="0.7" type="float" label="From" />
+			<param name="simthresh_max" value="0.7" type="float" label="To" />
+			<param name="simthresh_step" value="0.1" type="float" label="Step" />
+		</section>
+		
+		<section name="snthresh" title="s/n threshold (default: 2)" help="Signal to noise ratio cutoff" >
+			<param name="snthresh_min" value="2" type="integer" label="From" />
+			<param name="snthresh_max" value="2" type="integer" label="To" />
+			<param name="snthresh_step" value="1" type="integer" label="Step" />
+		</section>
+		
+		<section name="step" title="Step (default: 0.05)" help="Step size to use for profile generation" >
+			<param name="step_min" value="0.05" type="float" label="From" />
+			<param name="step_max" value="0.05" type="float" label="To" />
+			<param name="step_step" value="0.01" type="float" label="Step" />
+		</section>
+		
+		<section name="steps" title="Steps (default: 2)" help="Number of steps to merge prior to filtration" >
+			<param name="steps_min" value="2" type="integer" label="From" />
+			<param name="steps_max" value="2" type="integer" label="To" />
+			<param name="steps_step" value="1" type="integer" label="Step" />
+		</section>
+		
+		<section name="outputs_options" title="Output options" expanded="True" >
+			<conditional name="count_duplicates" >
+				<param name="count_duplicates_bool" type="boolean" checked="false" label="Count the duplicates?" help="Counting the number of duplicates for each test slows the process down" />
+				<when value="true" >
+					<param name="duplicates_delta_rt" value="0.1" type="float" label="Delta rt" help="Maximum difference in two ions' m/z to consider them as equal" />
+					<param name="duplicates_delta_mz" value="0.1" type="float" label="Delta m/z" help="Maximum difference in two ions' rt to consider them as equal" />
+				</when>
+				<when value="false" />
+			</conditional>
+			
+			<conditional name="check_ions" >
+				<param name="check_ions_bool" type="boolean" checked="false" label="Check the presence of ions?" help="Checking the presence of ions for each test slows the process down" />
+				<when value="true" >
+					<param name="ions_delta_rt" value="0.1" type="float" label="Delta rt" help="Maximum difference in two ions' m/z to consider them as equal" />
+					<param name="ions_delta_mz" value="0.5" type="float" label="Delta m/z" help="Maximum difference in two ions' rt to consider them as equal" />
+					<repeat name="ions_to_check" title="Check ion" min="1" help="Add as many ions as necessary" >
+						<param name="name" value="Valine" type="text" label="Name" help="Name to be displayed. Ex: Valine" />
+						<param name="rt" value="9.67" type="float" label="Retention time" help="The ion's retention time. Ex: 9.67" />
+						<param name="mzs" value="218.1105,219.1146" type="text" label="Mass-to-charge ratio for each isotope" help="List of the isotopes' m/zs separated by commas, without spaces. Ex: 218.1105,219.1146"/>
+					</repeat>
+				</when>
+				<when value="false" />
+			</conditional>
+		</section>
+	</inputs>
+  
+	<outputs>
+		<data name="summary_out" label="summary.tsv" format="tabular" />
+		<data name="peakspectra_out" label="peakspectra.zip" format="zip" />
+		<data name="intensity_graph_out" label="intensity_graph.pdf" format="pdf" />
+		<data name="duplicates_out" label="duplicates.zip" format="zip" >
+			<filter>outputs_options['count_duplicates']['count_duplicates_bool']</filter>
+		</data>
+		<data name="compare_graph_out" label="compare_graph.pdf" format="pdf" />
+    </outputs>
+	
+	<help>
+
+**Author** Pauline Ribeyre
+
+=========
+GCMS-test
+=========
+
+-----------
+Description
+-----------
+
+This tool allows the user to run the method metaMS.runGC() with several sets of parameters and to compare the results of each run.
+
+-----------------
+Workflow position
+-----------------
+
+**Upstream tools**
+
+You must start from here.
+
+**Downstream tools**
+
++---------------------------+---------------------------------------+--------+
+| Name                      | Output file                           | Format |
++===========================+=======================================+========+
+|                           |                                       |        |
++---------------------------+---------------------------------------+--------+
+
+----------
+Parameters
+----------
+
+The **zip file** contains the data (the chromatograms to analyse with metaMS.runGC).
+
+Each section allows the user to vary one parameter by choosing a start value, a stop value and a step for this parameter.
+
+The last section allows the user to set output preferences.
+
+-------
+Outputs
+-------
+
+The output file **summary.tsv** is a tabular file. It contains the summary of the tests.
+
+The output file **peakspectra.zip** is a zip file. It contains the .msp and .tsv peakspectra files for each test.
+
+The output file **intensity_graph.zip** is a pdf file. It contains the "number of ions vs intensity range" graph for each test.
+
+The output file **duplicates.zip** is a zip file. It contains the .tsv duplicates file for each test.
+
+---------
+Changelog
+---------
+
+**Version 1.0 - 21/07/2017**
+
+	</help>
+</tool>
\ No newline at end of file
diff -r 000000000000 -r 40de28c7d3fb Testtest/GCMS-test_analyze.R
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Testtest/GCMS-test_analyze.R	Thu Nov 23 08:50:14 2017 -0500
@@ -0,0 +1,149 @@
+# author: Pauline Ribeyre
+
+
+#####################
+# required packages #
+#####################
+
+library("metaMS") # provides "runGC" function
+
+
+############
+# analysis #
+############
+
+peakspectra_table <- function(GC_results, file_title) {
+  # Saves the pseudospectra in 2 files (text and tabular formats).
+  
+  names <- c()
+  rts <- c()
+  rt.sds <- c()
+  mzs_df <- list()
+  for (ps in GC_results$PseudoSpectra) {
+    
+    name <- ps$Name
+    rt <- ps$rt
+    rt.sd <- ps$rt.sd
+    
+    names <- c(names, name)
+    rts <- c(rts, rt)
+    rt.sds <- c(rt.sds, rt.sd)
+    
+    spectrum <- data.frame(ps$pspectrum)
+    mz <- c()
+    maxo <- c()
+    for (i in 1:nrow(spectrum)) {
+      ion <- spectrum[i,]
+      mz <- c(mz, ion$mz)
+      maxo <- c(maxo, ion$maxo)
+    }
+    df <- data.frame(name, rt, rt.sd, mz, maxo)
+    mzs_df[[length(mzs_df) + 1]] <- df
+    
+  }
+  
+  df = do.call(rbind, mzs_df)
+  
+  write.table(df,
+              file = file_title,
+              quote = FALSE,
+              row.names = FALSE,
+              sep = "\t")
+  
+}
+
+
+my_runGC <- function(n, cdf_files, titles_to_test, settings_to_test) {
+  # Runs the data analysis and records the results.
+  #
+  # Args:
+  #   n: index of the current test, to select the corresponding title and settings set.
+  #   cdf_files: list of the data files' names.
+  #   titles_to_test:  list of titles (one for each settings set) (concatenation of the values taken by the varied parameters).
+  #   settings_to_test: list of settings sets for runGC.
+  
+  library("metaMS")
+  
+  settings <- settings_to_test[n][[1]]
+  title <- titles_to_test[n]
+  
+  print(title)
+  
+  if (!file.exists(paste0("Peak_tables/", title, ".tsv"))) {
+
+    # run
+    GC_results <- runGC(files = cdf_files, settings = settings, returnXset = TRUE, nSlaves = 20)
+
+    # order the result table by retention time
+    peak_table <- GC_results$PeakTable <- GC_results$PeakTable[order(GC_results$PeakTable[,"rt"]),]
+    peak_table_values <- peak_table[,5:(5 + length(cdf_files) - 1)]
+    peak_table$nb_zeros <- apply(peak_table_values, 1, function(x) sum(x == 0))
+    zeros_per_l <- sum(peak_table$nb_zeros)/nrow(peak_table)
+
+    # record the table in a file
+    file_title <- paste0("Peak_tables/", title, ".tsv")
+    write.table(peak_table, file = file_title, sep = "\t", row.names = FALSE) # /!\ title length -> cannot open connexion
+    
+    # record the RData
+    file_title <- paste0("RDatas/", title, ".RData")
+    # save(GC_results, settings, file = file_title)
+    save(GC_results, file = file_title)
+    
+    # record the pseudospectra in files (.msp and .tsv)
+    file_title <- paste0("Pseudospectra/", title, ".msp")
+    write.msp(GC_results$PseudoSpectra, file = file_title, newFile = TRUE)
+    file_title <- paste0("Pseudospectra/", title, ".tsv")
+    peakspectra_table(GC_results, file_title)
+
+    cat(paste(zeros_per_l, "zeros per line.\n\n"))
+
+  } # end if
+  
+}
+
+
+runGC_vary_parameters_parallel <- function(nb_cores, cdf_files, settings, vary) {
+  # Calculates the number of sets of paramaters and runs the analysis on several cores.
+  #
+  # Args:
+  #   nb_cores: maximum number of cores to use.
+  #   cdf_files: list of the data files' names.
+  #   settings: default settings for runGC().
+  #   vary: list of parameters to vary and the values each parameter must take.
+  
+  # calculate the number of possibilities with the parameters' ranges
+  nb_possibilites <- 1
+  for (param in vary) {
+    range_param <- param[3:length(param)]
+    nb_possibilites <- nb_possibilites * length(range_param)
+  }
+  cat("Settings variations:", nb_possibilites, "combinations.\n")
+  
+  dir.create("RDatas", showWarnings = FALSE) #create the folder where the RDatas will be saved
+  dir.create("Peak_tables", showWarnings = FALSE) #create the folder where the peak tables will be saved
+  # dir.create("Ions_per_intensity", showWarnings = FALSE) #create the folder where the nb of ions per intensity range will be saved
+  dir.create("Pseudospectra", showWarnings = FALSE) #create the folder where the pseudospectra will be saved
+
+  time.start <- Sys.time() # start the timer
+
+  # run the function on several cores
+  if (length(titles_to_test) < nb_cores)
+    nb_cores <- length(titles_to_test)
+  cluster <- makeCluster(nb_cores)#, outfile = "")
+  parLapplyLB(cluster, 1:length(titles_to_test), my_runGC, cdf_files, titles_to_test, settings_to_test)
+  print(titles_to_test)
+  stopCluster(cluster)
+
+  time.end <- Sys.time() # stop the timer
+  Tdiff <- difftime(time.end, time.start)
+  print(Tdiff)
+  
+}
+
+
+############
+#   main   #
+############
+
+runGC_vary_parameters_parallel(nb_cores, cdf_files, settings, vary_list)
+
diff -r 000000000000 -r 40de28c7d3fb Testtest/GCMS-test_output.R
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Testtest/GCMS-test_output.R	Thu Nov 23 08:50:14 2017 -0500
@@ -0,0 +1,450 @@
+# author: Pauline Ribeyre
+
+
+#####################
+# required packages #
+#####################
+
+library("parallel") # provides cluster methods and "parLapplyLB" function
+library("grDevices")  # provides jpeg handling methods
+
+
+############
+#   data   #
+############
+
+update_list_of_file_names <- function() {
+  # Writes in a file the list of file names using the files created previously in ./RDatas.
+  
+  file.create(source_file_names, showWarnings = FALSE) # erase the file
+  directory <- "RDatas"
+  files <- list.files(directory, full.names = TRUE)
+  
+  for (f in files) {
+    f <- substr(f, 8, nchar(f) - 6)
+    write(f, file = source_file_names, append = TRUE)
+  }
+  
+}
+
+
+titles_to_columns <- function(indicators) {
+  # Parse the "title" column of the indicator dataframe to separate the different parameters and their values.
+  #
+  # Args:
+  #   indicators: dataframe (one row per test) containing the results.
+  #
+  # Returns:
+  #   new_indicators: copy of "indicators" with one column added for each parameter that varied.
+  
+  default_settings <- FALSE
+  
+  # parse the title
+  param_names <- c()
+  for (title in indicators$title) {
+    
+    if (title == "default_settings") {
+      default_settings <- TRUE
+      break
+    }
+    
+    else {
+      params <- strsplit(title, "_")[[1]]
+      for (param in params) {
+        name <- strsplit(param, "=")[[1]][1]
+        
+        # some lowercase values create errors because they are primitive functions
+        substr(name, 1, 1) <- toupper(substr(name, 1, 1))
+        
+        if (!name %in% param_names)
+          param_names <- c(param_names, name)
+        value <- strsplit(param, "=")[[1]][2]
+        if (!exists(name))
+          assign(name, c())
+        assign(name, c(get(name), value))
+      }
+    }
+    
+  }
+  
+  new_indicators <- indicators
+  
+  # add the columns to the dataframe
+  if (!default_settings) {
+    for (name in param_names) {
+      new_indicators <- cbind(new_indicators, get(name))
+      names(new_indicators)[ncol(new_indicators)] <- name
+    }
+    
+    # order the dataframe's columns
+    order <- c(1, (ncol(new_indicators) - length(param_names) + 1) : ncol(new_indicators), 2 : (ncol(new_indicators) - length(param_names)))
+    new_indicators <- new_indicators[, order]
+  }
+  
+  return (new_indicators)
+  
+}
+
+
+create_summary <- function(nb_cores, count_duplicates = FALSE) {
+  # Reads the files created by runGC_vary_parameters() and calculates the quality criteria for each test.
+  #
+  # Args:
+  #   nb_cores: maximum number of cores to use.
+  #   count_duplicates: calculate the number of count_duplicates obtained by each test (slow).
+  #
+  # Returns:
+  #   A dataframe (one row per test) containing the results:
+  #     title, nb ions, nb zeros/line, nb ions/intensity range, presence of valine, nb count_duplicates (opt).
+# 
+#   # Returns:
+#   #   A list containing:
+#   #   1: dataframe (one row per test) containing the results:
+#   #     title, nb ions, nb zeros/line, nb ions/intensity range, presence of valine, nb count_duplicates (opt).
+#   #   2: list of the settings sets for each test.
+  
+  time.start <- Sys.time() # start the timer
+  
+  file_names <- readLines(source_file_names)
+  
+  dir.create("count_duplicates", showWarnings = FALSE) #create the folder where the details of the count_duplicates will be saved
+  
+  # summ <- create_summary_parallel(1, file_names)
+  # cat("indic:\n",summary[1][[1]],"\n")
+  
+  # run the function on several cores
+  if (length(file_names) != 0) {
+    if (length(file_names) < nb_cores)
+      nb_cores <- length(file_names)
+    cluster <- makeCluster(nb_cores) #, outfile = "")
+    summaries <- parLapplyLB(cluster, 1:length(file_names), create_summary_parallel, file_names = file_names, count_duplicates = count_duplicates) #, pb = pb)
+    stopCluster(cluster)
+  }
+  else
+    stop("There are no files to generate the output from.")
+  
+  # concatenate the results obtained by all the cores
+  indicators <- NULL
+  for (summary in summaries)
+    indicators <- rbind(indicators, t(summary))
+  # settings_list <- c()
+  # for (summary in summaries) {
+  #   settings <- summary[2][[1]]
+  #   indic <- summary[1][[1]]
+  # 
+  #   indicators <- rbind(indicators, t(indic))
+  #   settings_list <- c(settings_list, settings)
+  # }
+  indicators <- data.frame(indicators)
+
+  col_names <- c("title", "nb_pseudospectra", "zeros_per_line", "f0to10E3", "f10E3to10E4", "f10E4to10E5",
+                 "f10E5to10E6", "f10E6to10E7", "f10E7")
+  
+  if (check_ions)
+    col_names <- c(col_names, ions_name)
+  
+  names(indicators)[1:length(col_names)] <- col_names
+  if (count_duplicates)
+    names(indicators)[(ncol(indicators) - 1):ncol(indicators)] <- c("count_duplicates", "nb_ions")
+  
+  indicators <- titles_to_columns(indicators)
+
+  time.end <- Sys.time() # stop the timer
+  Tdiff <- difftime(time.end, time.start)
+  print(Tdiff)
+  
+  # return (list(indicators, settings_list))
+  return (indicators)
+  
+}
+
+
+create_summary_parallel <- function(n, file_names, count_duplicates = FALSE) { #}, pb) {
+  # Reads the files created by runGC_vary_parameters() and calculates the quality criteria for each test.
+  #
+  # Args:
+  #   n: index of the current test, to select the corresponding title.
+  #   file_names:  list of titles (one for each test) (concatenation of the values taken by the varied parameters).
+  #   nb_cores: maximum number of cores to use.
+  #   count_duplicates: calculate the number of count_duplicates obtained by each test (slow).
+  #
+  # Returns:
+  #   A dataframe (one row per test) containing the results:
+  #     title, nb ions, nb zeros/line, nb ions/intensity range, presence of valine.
+  # 
+  # # Returns:
+  # #   A list containing:
+  # #   1: dataframe (one row per test) containing the results:
+  # #     title, nb ions, nb zeros/line, nb ions/intensity range, presence of valine.
+  # #   2: list of the settings sets for each test.
+  
+  source(source_spectrum, environment())
+  
+  intensities_x <- c(1000, 10000, 100000, 1000000, 10000000)
+  
+  this_title <- file_names[n]
+  
+  # calculate the number of zeros
+  file_title <- paste0("Peak_tables/", this_title, ".tsv")
+  peak_table <- read.table(file_title, sep="\t", header=TRUE)
+  peak_table_values <- peak_table[,5:ncol(peak_table)]
+  zeros <- sum(peak_table$nb_zeros)
+  nb_lines <- nrow(peak_table)
+  zeros_per_line <- round(zeros/nb_lines, 4)
+  
+  # count the number of ions by intensity range
+  intensities_y <- c()
+  intensities_y[1] <- length(rowMeans(
+    peak_table_values, na.rm = TRUE)[rowMeans(peak_table_values, na.rm = TRUE) < intensities_x[1]])
+  for (i in 2:length(intensities_x))
+    intensities_y[i] <- length(rowMeans(
+      peak_table_values, na.rm = TRUE)[rowMeans(peak_table_values, na.rm = TRUE) < intensities_x[i] &
+                                       rowMeans(peak_table_values, na.rm = TRUE) > intensities_x[i - 1]])
+  intensities_y[i + 1] <- length(rowMeans(
+    peak_table_values, na.rm = TRUE)[rowMeans(peak_table_values, na.rm = TRUE) > intensities_x[i]])
+  
+  # load the settings
+  file_title <- paste0("RDatas/", this_title, ".RData")
+  load(file_title)
+
+  # count the number of count_duplicates and record in a file
+  if (count_duplicates) {
+    file_title <- paste0("count_duplicates/", this_title, ".tsv")
+    nb_count_duplicates <- data.frame(count_duplicates_function(GC_results))
+    names(nb_count_duplicates) <- c("mz_min", "mz_max", "rt", "count_duplicates")
+    write.table(nb_count_duplicates, file = file_title, sep = "\t", row.names = FALSE)
+    count_duplicates <- nrow(nb_count_duplicates)
+    
+    nb_ions <- 0
+    pseudospectra <- GC_results$PseudoSpectra
+    for (ps in pseudospectra) {
+      nb_ions <- nb_ions + nrow(ps$pspectrum)
+    }
+  }
+  
+  summary <- c(this_title, nb_lines, zeros_per_line, intensities_y)
+  
+  # check the presence of ions
+  if (nb_ions_to_check > 0) {
+    for (i in 1:nb_ions_to_check) {
+      name <- ions_name[[i]]
+      rt <- ions_rt[[i]]
+      mzs <- ions_mzs[[i]]
+      cat("Check:", name, rt, mzs, "\n")
+      
+      value <- is_ion_present(GC_results, rt = rt, mz_list = mzs)
+      assign(name, value)
+      summary <- c(summary, get(name))
+    }
+  }
+  
+  # check the presence of ion valine 12C and 13C
+  # valine <- is_ion_present(GC_results, rt = 9.67, mz_list = list(218.1105, 219.1146))
+
+  # summary <- c(summary, valine)
+  
+  if (count_duplicates)
+    summary <- c(summary, count_duplicates, nb_ions)
+
+  # return (list(summary, settings))
+  return (summary)
+  
+}
+
+
+############
+#  graphs  #
+############
+
+ions_per_intensity <- function(indicators) {
+  # For each test, plots the number of ions for each range of intensity.
+  #
+  # Args:
+  #   indicators: dataframe (one row per test) containing the results. 
+  
+  pdf(intensity_graph_out)
+  par(mar = c(5.1,4.1,5,2.1))
+  
+  for (i in 1:nrow(indicators)) {
+    
+    indic <- indicators[i,]
+    
+    names <- c("f0to10E3", "f10E3to10E4", "f10E4to10E5", "f10E5to10E6", "f10E6to10E7", "f10E7")
+    intensities_y <- c()
+    
+    for (colname in names) {
+      intensities_y <- c(intensities_y, indic[colname])
+    }
+    intensities_y <- unlist(intensities_y)
+    intensities_y <- as.numeric(levels(intensities_y))[intensities_y]
+    
+    title <- as.character(indic$title)
+    title <- strsplit(title, "_")[[1]]
+    plot_title <- ""
+    for (i in 1:length(title)) {
+      plot_title <- paste(plot_title, title[[i]])
+      if (i %% 2 == 0)
+        plot_title <- paste(plot_title, "\n")
+      else
+        plot_title <- paste(plot_title, " ")
+    }
+    
+    barplot(intensities_y, names.arg = names, xlab = "intensity", ylab = "number of ions",
+            main = plot_title, cex.main = 0.8)
+  
+  }
+  
+  dev.off()
+  
+}
+
+
+graph_results <- function(indicators, criteria) {
+  # Plots the results.
+  #
+  # Args:
+  #   indicators: dataframe (one row per test) containing the results.
+  
+  first_criteria <- grep("nb_pseudospectra", names(indicators))
+  nb_params <- first_criteria - 2
+  this_criteria <- grep(criteria, names(indicators))
+  
+  # values taken by each parameter
+  values <- list()
+  for (i in 2:(1 + nb_params)) {
+    lev <- unique(indicators[,i])
+    values[[i - 1]] <- lev
+  }
+  
+  length <- lapply(values, length)
+  
+  # indexes of the parameters taking the most values
+  longest_1 <- which.max(length); length[longest_1] <- -1
+  longest_2 <- which.max(length); length[longest_2] <- -1
+  longest_3 <- which.max(length); length[longest_3] <- -1
+  
+  # indexes of the other parameters
+  shortest <- which(length != -1)
+  
+  # all combinations of the values taken by these parameters
+  combinations <- list()
+  for (s in shortest) {
+    page <- indicators[,s + 1]
+    combinations[[length(combinations) + 1]] <- sort(as.numeric(as.character(unique(page))))
+  }
+  names(combinations) <- names(indicators)[shortest + 1]
+  combinations <- expand.grid(combinations)
+  
+  # save the plots in a pdf file
+  pdf(compare_graph_out)
+  nb_rows <- length(values[longest_3][[1]])
+  nb_cols <- length(values[longest_2][[1]])
+  par(mfrow = c(nb_rows, nb_cols), mar = c(4.5,4.5,5,1))
+  
+  # plot parameters
+  x <- sort(as.numeric(as.character(unique(indicators[,longest_1 + 1]))))
+  min_zeros <- min(as.numeric(as.character(indicators$zeros_per_line)))
+  max_zeros <- max(as.numeric(as.character(indicators$zeros_per_line)))
+  
+  for (rowi in 1:nrow(combinations)) {
+    row <- combinations[rowi,]
+    title <- ""
+    lines <- indicators
+    
+    for (coli in 1:length(row)) {
+      value <- as.numeric(as.character(row[coli]))
+      title <- paste(title, names(row[coli]), "=", value)
+      if (coli %% 2 == 0)
+        title <- paste(title, "\n")
+      else
+        title <- paste(title, " ")
+      lines <- lines[lines[names(row[coli])] == value,]
+    }
+    
+    # values taken by the parameters
+    in_plot <- lines[,longest_1 + 1]
+    vertical <- lines[,longest_2 + 1]
+    horizontal <-  lines[,longest_3 + 1]
+    
+    # for each horizontal value
+    for (horiz in sort(unique(horizontal))) {
+      
+      # for each vertical value
+      for (vertic in sort(unique(vertical))) {
+        
+        y <- c()
+        for (this_y in sort(unique(in_plot))) {
+          # line <- line[page == p & horizontal == horiz & vertical == vertic & in_plot == this_y,]
+          line <- lines[horizontal == horiz & vertical == vertic & in_plot == this_y,]
+          if (nrow(line) != 1)
+            stop("To plot the results, each set of the parameters' values must be represented exactly 1 time")
+          
+          value <- line[,this_criteria]
+          value <- as.numeric(as.character(value))
+          y <- c(y, value)
+        }
+        
+        this_title <- paste(title,
+                       names(indicators)[longest_3 + 1], "=", horiz, "\n",
+                       names(indicators)[longest_2 + 1], "=", vertic)
+        
+        # plot this graph
+        plot(x, y, ylim = c(min_zeros, max_zeros),
+             type = "b",
+             xlab = names(indicators)[longest_1 + 1],
+             ylab = criteria,
+             main = this_title,
+             cex.main = 0.8)
+        
+      }
+      
+    }
+  
+  }
+  
+  par(mfrow = c(1,1))
+  dev.off()
+  
+}
+
+
+############
+#   main   #
+############
+
+update_list_of_file_names()
+summary <- create_summary(nb_cores, count_duplicates = count_duplicates)
+
+# settings_list <- summary[2][[1]]
+# indicators_ <- summary[1][[1]]
+indicators_ <- summary
+
+indicators <- indicators_[order(indicators_$zeros_per_line),] # sort by number of zeros per line
+# indicators_ <- indicators[order(as.numeric(row.names(indicators))),] # order back to row numbers
+
+# record the summary in a file
+indicators_to_write <- indicators[, !names(indicators) %in%
+                                    c("title", "f0to10E3", "f10E3to10E4", "f10E4to10E5",
+                                      "f10E5to10E6", "f10E6to10E7", "f10E7")]
+write.table(indicators_to_write,
+            file = summary_out,
+            quote = FALSE,
+            row.names = FALSE,
+            sep = "\t")
+
+# ions per intensity graph
+ions_per_intensity(indicators_)
+
+# plots to compare each set of parameters
+graph_results(indicators_, criteria = "zeros_per_line")
+
+# zip of pseudospectra .msp and .tsv files
+system(paste0('cd Pseudospectra ; ls . | grep -e "msp$" -e "tsv$" | zip -r -@ "peakspectra_out.zip"'))
+system(paste("cd Pseudospectra ; mv peakspectra_out.zip", peakspectra_out))
+
+# zip of count_duplicates .tsv files
+if (count_duplicates) {
+  system(paste0('cd count_duplicates ; ls . | grep "tsv$" | zip -r -@ "count_duplicates_out.zip"'))
+  system(paste("cd count_duplicates ; mv count_duplicates_out.zip", count_duplicates_out))
+}
+
diff -r 000000000000 -r 40de28c7d3fb Testtest/GCMS-test_settings.R
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Testtest/GCMS-test_settings.R	Thu Nov 23 08:50:14 2017 -0500
@@ -0,0 +1,227 @@
+# author: Pauline Ribeyre
+
+
+#####################
+# required packages #
+#####################
+
+library("metaMS") # provides "metaMSsettings" class
+
+
+####################
+# default settings #
+####################
+
+# PeakPicking
+PeakPicking.method = "matchedFilter"
+PeakPicking.step = 0.05
+PeakPicking.steps = 2
+PeakPicking.mzdiff = 0.5
+PeakPicking.fwhm = 5
+PeakPicking.snthresh = 2
+PeakPicking.max = 500
+
+# Alignment
+# ?
+
+# CAMERA
+CAMERA.perfwhm = 1
+
+# DBconstruction
+DBconstruction.minintens = 0.0
+DBconstruction.rttol = 0.1
+DBconstruction.intensityMeasure = "maxo"
+DBconstruction.DBthreshold = .80
+DBconstruction.minfeat = 5
+
+# match2DB
+match2DB.rtdiff = 0.5
+match2DB.minfeat = 5
+# match2DB.rtval = ?
+# match2DB.mzdiff = ?
+# match2DB.ppm = ?
+match2DB.simthresh = 0.8
+match2DB.timeComparison = "rt"
+match2DB.RIdiff = 5
+
+# betweenSamples
+betweenSamples.min.class.fraction = 0.8 # pools
+# betweenSamples.min.class.fraction = 0.5 # samples
+betweenSamples.min.class.size = 5
+betweenSamples.timeComparison = "rt"
+betweenSamples.rtdiff = 0.5
+betweenSamples.RIdiff = 2 # default setting
+# betweenSamples.RIdiff = 0.05 # w4m setting
+betweenSamples.simthresh = 0.8
+
+# matchIrrelevants
+# matchIrrelevants.irrelevantClasses = c("Bleeding", "Plasticizers")
+# matchIrrelevants.RIdiff = ?
+# matchIrrelevants.rtdiff = ?
+# matchIrrelevants.simthresh = ?
+# matchIrrelevants.timeComparison = "RI"
+
+# TODO RI option
+
+
+###################
+# settings object #
+###################
+
+# settings <- metaMSsettings("protocolName" = "settings", "chrom" = "GC")
+
+settings <- metaMSsettings("protocolName" = "settings",
+                            "chrom" = "GC",
+                           
+                            PeakPicking = list(
+                              method = PeakPicking.method,
+                              step = PeakPicking.step,
+                              steps = PeakPicking.steps,
+                              mzdiff = PeakPicking.mzdiff,
+                              fwhm = PeakPicking.fwhm,
+                              snthresh = PeakPicking.snthresh,
+                              max = PeakPicking.max),
+                           
+                            CAMERA = list(perfwhm = CAMERA.perfwhm))
+
+metaSetting(settings, "DBconstruction") <- list(
+  minintens = DBconstruction.minintens,
+  rttol = DBconstruction.rttol,
+  intensityMeasure = DBconstruction.intensityMeasure,
+  DBthreshold = DBconstruction.DBthreshold, 
+  minfeat = DBconstruction.minfeat)
+
+metaSetting(settings, "match2DB") <- list(
+  simthresh = match2DB.simthresh,
+  timeComparison = match2DB.timeComparison,
+  rtdiff = match2DB.rtdiff,
+  RIdiff = match2DB.RIdiff,
+  minfeat = match2DB.minfeat)
+
+metaSetting(settings, "betweenSamples") <- list(
+  min.class.fraction = betweenSamples.min.class.fraction,
+  min.class.size = betweenSamples.min.class.size,
+  timeComparison = betweenSamples.timeComparison,
+  rtdiff = betweenSamples.rtdiff,
+  RIdiff = betweenSamples.RIdiff,    
+  simthresh = betweenSamples.simthresh)
+
+# metaSetting(GALAXY.GC, "matchIrrelevants") <- list(
+#   irrelevantClasses = matchIrrelevants.irrelevantClasses,
+#   RIdiff = matchIrrelevants.RIdiff,
+#   rtdiff = matchIrrelevants.rtdiff,
+#   simthresh = matchIrrelevants.simthresh,
+#   timeComparison = matchIrrelevants.timeComparison)
+
+
+# best settings pool
+settings@betweenSamples.min.class.size = 5
+settings@PeakPicking$fwhm = 6
+settings@PeakPicking$mzdiff = -0.5
+settings@betweenSamples.min.class.fraction = 0.8
+settings@betweenSamples.simthresh = 0.7
+settings@betweenSamples.rtdiff = 0.05
+
+
+# best settings samples
+settings@betweenSamples.min.class.size = 5
+settings@PeakPicking$fwhm = 6
+settings@PeakPicking$mzdiff = 0.5
+settings@betweenSamples.min.class.fraction = 0.5
+settings@betweenSamples.simthresh = 0.7
+settings@betweenSamples.rtdiff = 0.05
+
+
+#################
+# vary settings #
+#################
+
+pop <- function(list) {
+  # Removes the last item from a list.
+  #
+  # Args:
+  #   list: the list to pop.
+  #
+  # Returns:
+  #   The last item of the list before it was removed.
+  
+  val <- list[length(list)]
+  assign(as.character(substitute(list)), list[-length(list)], parent.frame())
+  return (val)
+  
+}
+
+
+vary_list_to_settings <- function(settings, vary_list, title = "", titles_to_test = c(), settings_to_test = c()) {
+  # Reads the list of parameters to vary and their ranges and builds a list of file titles and settings from it.
+  #
+  # Args:
+  #   settings: default settings for runGC.
+  #   vary_list: list of parameters to vary and the values each parameter must take.
+  #   title: title of the current file (concatenation of the values taken by the varied parameters).
+  #   titles_to_test:  list of titles (one for each settings set) (concatenation of the values taken by the varied parameters).
+  #   settings_to_test: list of settings sets for runGC.
+  #
+  # Returns:
+  #   A list containing:
+  #   1: the list of file titles ;
+  #   2: the list of runGC() settings.
+  
+  if (length(vary_list) == 0) { # if there are no parameters to vary
+    
+    titles_to_test <- c(titles_to_test, "default_settings")
+    settings_to_test <- c(settings_to_test, settings)
+    
+  }
+  
+  else {
+    
+    param <- pop(vary_list)[[1]]
+    
+    title <- paste0(title, param[1])
+    setting <- param[2]
+    range <- as.numeric(param[3:length(param)])
+    
+    # for each possible value of the parameter
+    for (p in range) {
+      
+      # change the settings
+      isSlot <- regexpr("\\$", setting)[1]
+      
+      if (isSlot == -1) { # the parameter is a slot in class metaMSsettings
+        slot(settings, setting) <- p
+      }
+      else { # the parameter is not a slot in class metaMSsettings
+        slot(settings, substr(setting, 1, isSlot - 1))[substr(setting, isSlot + 1, nchar(setting))] <- p
+      }
+      
+      new_title <- paste0(title, "=", p, "_")
+      
+      # save the settings in a list
+      if (length(vary_list) == 0) {
+        titles_to_test <- c(titles_to_test, new_title)
+        settings_to_test <- c(settings_to_test, settings)
+      }
+      else {
+        res <- vary_list_to_settings(settings, vary_list, new_title, titles_to_test, settings_to_test)
+        titles_to_test <- res[[1]]
+        settings_to_test <- res[[2]]
+      }
+      
+    } # end for
+    
+  } # end else
+  
+  return (list(titles_to_test, settings_to_test))
+  
+}
+
+
+############
+#   main   #
+############
+
+# settings to vary and ranges of values
+res <- vary_list_to_settings(settings, vary_list)
+titles_to_test <- res[[1]]
+settings_to_test <- res[[2]]
diff -r 000000000000 -r 40de28c7d3fb Testtest/GCMS-test_spectrum.R
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Testtest/GCMS-test_spectrum.R	Thu Nov 23 08:50:14 2017 -0500
@@ -0,0 +1,135 @@
+# author: Pauline Ribeyre
+
+
+#############
+# find ions #
+#############
+
+is_ion_present <- function(GC_results, rt, mz_list) {
+  # Looks for a ion in the pseudospectra.
+  #
+  # Args:
+  #   GC_results: result of a test by runGC().
+  #   rt: retention time of the ion to look for.
+  #   mz_list: list of the ion's isotopes' m/z.
+  #
+  # Returns:
+  #   The number of times the ion was found in the pseudospectra.
+  
+  delta_rt <- ions_delta_rt
+  delta_mz <- ions_delta_mz
+  
+  res = 0
+  
+  pseudospectra <- GC_results$PseudoSpectra
+  for (peak in pseudospectra) { # check all the peaks
+    if (peak$rt >= rt - delta_rt && peak$rt <= rt + delta_rt) { # check if the retention time matches
+      
+      ps <- peak$pspectrum
+      found_ion <- rep(NA, length(mz_list))
+      
+      for (i in 1:length(mz_list)) { # check the presence of all the ions given in parameter
+        
+        ion <- mz_list[i][[1]]
+        found_ion[i] <- sum((ps[,"mz"] >= ion - delta_mz) & (ps[,"mz"] <= ion + delta_mz))
+        
+      } # end for
+      
+      if (length(which(found_ion > 1)) > 0) {
+        stop("The ion was found more than 1 time in the same pseudospectrum.")
+      }
+      
+      if (sum(found_ion) == length(mz_list)) {
+        res <- res + 1
+      }
+      
+    } # end if
+  } # end for
+  
+  return (res)
+  
+}
+
+
+##############
+# duplicates #
+##############
+
+count_duplicates_function <- function(GC_results) {
+  # Counts the number of ions that have been found in more than one pseudospectra (slow).
+  #
+  # Args:
+  #   GC_results: result of a test by runGC().
+  #
+  # Returns:
+  #   The number of duplicates.
+  
+  delta_rt <- duplicates_delta_rt
+  delta_mz <- duplicates_delta_mz
+  
+  pseudospectra <- GC_results$PseudoSpectra
+  
+  # order by pseudospectrum rt
+  rts <- sapply(pseudospectra, function(x) x$rt)
+  rts_order <- order(rts)
+  
+  duplicates <- NULL      # record all the duplicates we find
+  already_counted <- NULL # record which duplicates have already been counted
+  
+  for (pspec in 1:length(rts)) { # for each pseudospectrum
+    
+    i <- rts_order[pspec]
+    this_rt <- rts[i]
+    
+    # number of following pseudospectra to consider
+    k <- 0
+    while (pspec + k + 1 <  length(rts_order) && rts[rts_order[pspec + k + 1]] < this_rt + delta_rt) { # check if the retention time matches
+      k <- k + 1
+    }
+    
+    mzs <- lapply(pseudospectra[rts_order[pspec:(pspec + k)]], function(x) x$pspectrum)
+    mzs_sorted <- NULL
+    for (j in 1:(k + 1)) {
+      mzs_sorted <- rbind(mzs_sorted, t(sapply(1:length(mzs[[j]][,1]), function(n) c(rts[rts_order[pspec + j - 1]], mzs[[j]][,1][n]))))
+    }
+    mzs_sorted <- mzs_sorted[order(mzs_sorted[,2]),] # column 1: rt ; column 2: m/z
+    
+    i <- 1
+    while (i <= nrow(mzs_sorted)) { # for each m/z
+      
+      j <- 1
+      this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i,])), data.frame(already_counted))) > 0
+      
+      if (is.null(already_counted) || !this_already_counted) {
+        this_mz <- mzs_sorted[i,2]
+        already_counted <- rbind(already_counted, mzs_sorted[i,])
+        same_ion <- 1 # count how many times this m/z appears
+        
+        while (j <= nrow(mzs_sorted) - i && mzs_sorted[i+j,2] >= this_mz - delta_mz && mzs_sorted[i+j,2] <= this_mz + delta_mz) { # check if the m/z matches
+          this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i+j,])), data.frame(already_counted))) > 0
+          
+          if (!this_already_counted) { # do not count a duplicate more than once
+            same_ion <- same_ion + 1
+            already_counted <- rbind(already_counted, mzs_sorted[i+j,]) # record which duplicates have already been counted
+            this_mz <- mzs_sorted[i+j,2]
+          }
+          
+          j <- j + 1
+        }
+        
+        # first/min m/z, last/max m/z, rt where we started, number of duplicates
+        if (same_ion > 1)
+          duplicates <- rbind(duplicates, c(mzs_sorted[i,2], this_mz, mzs_sorted[i,1], same_ion))
+      }
+      i <- i + j
+    }
+    
+  }
+  
+  if (is.null(duplicates))
+    duplicates <- c(0, 0, 0, 0)
+  
+  return (duplicates)
+  
+}
+