changeset 1:53f3f87ece44 draft

Deleted selected files
author melpetera
date Thu, 23 Nov 2017 09:35:25 -0500
parents 40de28c7d3fb
children 0b79cac1bbaa
files Testtest/GCMS-test.R Testtest/GCMS-test.xml Testtest/GCMS-test_analyze.R Testtest/GCMS-test_settings.R Testtest/GCMS-test_spectrum.R
diffstat 5 files changed, 0 insertions(+), 994 deletions(-) [+]
line wrap: on
line diff
--- a/Testtest/GCMS-test.R	Thu Nov 23 08:50:14 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,220 +0,0 @@
-# author: Pauline Ribeyre
-
-
-#####################
-# required packages #
-#####################
-
-library(batch) # provides "parseCommandargv" function
-
-
-############
-#   init   #
-############
-
-# read the parameters
-argv_list <- parseCommandArgs(evaluate = FALSE) #, trailingOnly = TRUE)
-argv <- unlist(argv_list)
-
-# common environment to share variables between files
-env <- new.env()
-
-# timestamp made unique by adding a random element
-env$timestamp <- round(as.numeric(Sys.time()) + runif(1, 0, 100))
-
-# set working directory
-tmpwd <- paste0("/galaxy/data/", env$timestamp)
-# system("chmod -R 0777 .")
-# dir.create(tmpwd, mode = "0777")
-dir.create(tmpwd)
-setwd(tmpwd)
-
-# readme file
-readme_msg <- paste0("\"", env$timestamp, "\"",
-                     " is a temporary folder storing data for an instance of Galaxy tool \"GCMS-test\" launched on ",
-                    Sys.time(), ". ",
-                    "This folder should be deleted automatically at the end of the tool's execution. ",
-                    "If the instance ended or has been aborted and this folder still exists, it can be deleted manually.")
-write(readme_msg, file = "README.txt")
-
-################
-# source files #
-################
-
-source_local <- function(fname){
-  # Gets a file's full name in Galaxy's directories.
-  #
-  # Args:
-  #   fname: the name of the file.
-  #
-  # Returns:
-  #   The full name of the file.
-
-  argv <- commandArgs(trailingOnly = FALSE)
-  base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
-  return (paste(base_dir, fname, sep="/"))
-
-}
-
-env$source_settings <- source_local("GCMS-test_settings.R")
-env$source_analyze <- source_local("GCMS-test_analyze.R")
-env$source_output <- source_local("GCMS-test_output.R")
-env$source_spectrum <- source_local("GCMS-test_spectrum.R")
-
-env$source_file_names <- "file_names.txt"
-
-
-#############
-# functions #
-#############
-
-args_to_vary_list <- function() {
-  # Creates the list of settings variations using the arguments.
-  #
-  # Returns:
-  #   The list of settings variations.
-
-  param <- c("step", "steps", "mzdiff", "fwhm", "simthresh", "snthresh", "max",
-             "minclassfraction", "minclasssize", "rtdiff") #, "minfeat")
-
-  PeakPicking_param <- c("step", "steps", "mzdiff", "fwhm", "snthresh", "max")
-  betweenSamples_param <- c("min.class.fraction", "min.class.size", "rtdiff", "simthresh")
-
-  vary <- list()
-
-  for (p in param) {
-
-    min <- as.numeric(argv[[paste0(p, "_min")]])
-    max <- as.numeric(argv[[paste0(p, "_max")]])
-    step <- as.numeric(argv[[paste0(p, "_step")]])
-
-    if (p == "minclassfraction")
-      p <- "min.class.fraction"
-    else if (p == "minclasssize")
-      p <- "min.class.size"
-
-    if (p %in% PeakPicking_param)
-      name <- paste0("PeakPicking$", p)
-    else if (p %in% betweenSamples_param)
-      name <- paste0("betweenSamples.", p)
-    else
-      stop("\"", p, "\" is not a valid parameter to vary.")
-
-    # prevent seq error
-    if (min > max)
-      max <- min
-
-    range <- seq(min, max, step)
-    vary[[length(vary) + 1]] <- c(p, name, range)
-
-  }
-
-  return (vary)
-
-}
-
-
-read_ions_to_check <- function() {
-  # Reads the arguments concerning the ions to check.
-  
-  env$ions_name <- list()
-  env$ions_rt <- list()
-  env$ions_mzs <- list()
-  
-  env$nb_ions_to_check <- length(grep("ions_name_", names(argv)))
-  
-  if (env$nb_ions_to_check > 0) {
-    for (i in 1:env$nb_ions_to_check) {
-      env$ions_name[length(env$ions_name) + 1] <- argv[[paste0("ions_name_", i)]]
-      env$ions_rt[length(env$ions_rt) + 1] <- as.numeric(argv[[paste0("ions_rt_", i)]])
-      mzs <- argv[[paste0("ions_mzs_", i)]]
-      mzs <- as.numeric(strsplit(mzs, ",")[[1]])
-      env$ions_mzs[length(env$ions_mzs) + 1] <- list(mzs)
-    }
-  }
-  
-}
-
-
-############
-#  inputs  #
-############
-
-env$nb_cores <- as.numeric(argv[["nb_cores"]])
-cat("Nb cores:", env$nb_cores, "\n")
-
-# get data from zip file
-if (!is.null(argv[["zip_file"]])) {
-  env$cdf_files <- unzip(argv[["zip_file"]])
-} else {
-  stop("No files have been provided.")
-}
-
-# vary parameters
-env$vary_list <- args_to_vary_list()
-
-# count duplicate ions
-env$count_duplicates <- argv[["count_duplicates"]] == "true"
-if (env$count_duplicates) {
-  
-  # delta rt
-  env$duplicates_delta_rt <- as.numeric(argv[["duplicates_delta_rt"]])
-  
-  # delta mz
-  env$duplicates_delta_mz <- as.numeric(argv[["duplicates_delta_mz"]])
-  
-}
-
-# check the presence of ions
-env$nb_ions_to_check <- 0
-env$check_ions <- argv[["check_ions"]] == "true"
-if (env$check_ions) {
-  
-  read_ions_to_check()
-  
-  # delta rt
-  env$ions_delta_rt <- as.numeric(argv[["ions_delta_rt"]])
-  
-  # delta mz
-  env$ions_delta_mz <- as.numeric(argv[["ions_delta_mz"]])
-  
-}
-
-
-#############
-#  outputs  #
-#############
-
-env$summary_out <- argv[["summary_out"]]
-env$peakspectra_out <- argv[["peakspectra_out"]]
-env$intensity_graph_out <- argv[["intensity_graph_out"]]
-
-if (env$count_duplicates)
-  env$count_duplicates_out <- argv[["duplicates_out"]]
-
-env$compare_graph_out <- argv[["compare_graph_out"]]
-
-
-##########
-#  run   #
-##########
-
-cat("---------- Settings ----------\n")
-sys.source(env$source_settings, env)
-
-
-cat("---------- Analysis ----------\n")
-sys.source(env$source_analyze, env)
-
-
-cat("----------  Output  ----------\n")
-sys.source(env$source_output, env)
-
-
-##########
-#  end   #
-##########
-
-# delete temporary files
-setwd("..")
-unlink(tmpwd, recursive = TRUE)
--- a/Testtest/GCMS-test.xml	Thu Nov 23 08:50:14 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,263 +0,0 @@
-<tool id="GCMSt" name="GCMS-test" version="17.11">
-	<description>Test parameters for GC-MS data preprocessing using metaMS package</description>
-    
-    <requirements>
-        <requirement type="package" version="1.8.0">bioconductor-metams</requirement>
-        <requirement type="package" version="1.46.0">bioconductor-xcms</requirement>
-        <requirement type="package" version="1.1_4">r-batch</requirement>
-    </requirements>
-    
-    <stdio>
-        <exit_code range="1:" level="fatal" />
-    </stdio>
-
-	<command>
-		Rscript $__tool_directory__/GCMS-test.R
-		
-		<!-- Inputs -->
-		
-		<!-- nb_cores \${GALAXY_SLOTS:-1} -->
-		nb_cores \${GALAXY_SLOTS}
-		
-		zip_file $zip_file
-		
-		step_min $step.step_min
-		step_max $step.step_max
-		step_step $step.step_step
-		
-		steps_min $steps.steps_min
-		steps_max $steps.steps_max
-		steps_step $steps.steps_step
-		
-		mzdiff_min $mzdiff.mzdiff_min
-		mzdiff_max $mzdiff.mzdiff_max
-		mzdiff_step $mzdiff.mzdiff_step
-		
-		fwhm_min $fwhm.fwhm_min
-		fwhm_max $fwhm.fwhm_max
-		fwhm_step $fwhm.fwhm_step
-		
-		simthresh_min $simthresh.simthresh_min
-		simthresh_max $simthresh.simthresh_max
-		simthresh_step $simthresh.simthresh_step
-		
-		snthresh_min $snthresh.snthresh_min
-		snthresh_max $snthresh.snthresh_max
-		snthresh_step $snthresh.snthresh_step
-		
-		max_min $max.max_min
-		max_max $max.max_max
-		max_step $max.max_step
-		
-		minclassfraction_min $minclassfraction.minclassfraction_min
-		minclassfraction_max $minclassfraction.minclassfraction_max
-		minclassfraction_step $minclassfraction.minclassfraction_step
-		
-		minclasssize_min $minclasssize.minclasssize_min
-		minclasssize_max $minclasssize.minclasssize_max
-		minclasssize_step $minclasssize.minclasssize_step
-		
-		rtdiff_min $rtdiff.rtdiff_min
-		rtdiff_max $rtdiff.rtdiff_max
-		rtdiff_step $rtdiff.rtdiff_step
-		
-		<!--minfeat_min $minfeat.minfeat_min
-		minfeat_max $minfeat.minfeat_max
-		minfeat_step $minfeat.minfeat_step-->
-		
-		count_duplicates $outputs_options.count_duplicates.count_duplicates_bool
-		#if $outputs_options.count_duplicates.count_duplicates_bool:
-			duplicates_delta_rt $outputs_options.count_duplicates.duplicates_delta_rt
-			duplicates_delta_mz $outputs_options.count_duplicates.duplicates_delta_mz
-		#end if
-		
-		check_ions $outputs_options.check_ions.check_ions_bool
-		#if $outputs_options.check_ions.check_ions_bool:
-			ions_delta_rt $outputs_options.check_ions.ions_delta_rt
-			ions_delta_mz $outputs_options.check_ions.ions_delta_mz
-			
-			#for $i, $s in enumerate($outputs_options.check_ions.ions_to_check)
-				ions_name_${i+1} $s.name
-				ions_rt_${i+1} $s.rt
-				ions_mzs_${i+1} $s.mzs
-			#end for
-		#end if
-		
-		<!-- Outputs -->
-		
-		summary_out $summary_out
-		peakspectra_out $peakspectra_out
-		intensity_graph_out $intensity_graph_out
-		
-		#if $outputs_options.count_duplicates.count_duplicates_bool:
-			duplicates_out $duplicates_out
-		#end if
-		
-		compare_graph_out $compare_graph_out
-	</command>
-  
-	<inputs>
-		<param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" help="Zip file containing the chromatograms to analyse" />
-		
-		<section name="fwhm" title="FWHM (default: 6)" help="The full width at half maximum of the peaks" >
-			<param name="fwhm_min" value="6" type="float" label="From" />
-			<param name="fwhm_max" value="6" type="float" label="To" />
-			<param name="fwhm_step" value="1" type="float" label="Step" />
-		</section>
-		
-		<section name="max" title="Max (default: 500)" help="Maximum number of peaks per extracted ion chromatogram" >
-			<param name="max_min" value="500" type="integer" label="From" />
-			<param name="max_max" value="500" type="integer" label="To" />
-			<param name="max_step" value="1" type="integer" label="Step" />
-		</section>
-		
-		<section name="minclassfraction" title="Minimum class fraction (default: 0.5)" help="The fraction of samples in which a pseudospectrum is present before it is regarded as an unknown" >
-			<param name="minclassfraction_min" value="0.5" type="float" label="From" />
-			<param name="minclassfraction_max" value="0.5" type="float" label="To" />
-			<param name="minclassfraction_step" value="0.5" type="float" label="Step" />
-		</section>
-		
-		<section name="minclasssize" title="Minimum class size (default: 5)" help="The absolute number of samples in which a pseudospectrum is present before it is regarded as an unknown" >
-			<param name="minclasssize_min" value="5" type="integer" label="From" />
-			<param name="minclasssize_max" value="5" type="integer" label="To" />
-			<param name="minclasssize_step" value="1" type="integer" label="Step" />
-		</section>
-		
-		<!-- <section name="minfeat" title="minfeat (default: 0.05)" help="The minimum number of ion in a mass spectra to consider it a molecule" >
-			<param name="minfeat_min" value="0.05" type="float" label="From" />
-			<param name="minfeat_max" value="0.05" type="float" label="To" />
-			<param name="minfeat_step" value="0.01" type="float" label="Step" />
-		</section> -->
-		
-		<section name="mzdiff" title="m/z diff (default: 0.05)" help="Minimum difference in m/z for peaks with overlapping retention times" >
-			<param name="mzdiff_min" value="0.05" type="float" label="From" />
-			<param name="mzdiff_max" value="0.05" type="float" label="To" />
-			<param name="mzdiff_step" value="0.01" type="float" label="Step" />
-		</section>
-		
-		<section name="rtdiff" title="RT diff (default: 0.05)" help="The allowed RT shift between same molecules in different samples" >
-			<param name="rtdiff_min" value="0.05" type="float" label="From" />
-			<param name="rtdiff_max" value="0.05" type="float" label="To" />
-			<param name="rtdiff_step" value="0.01" type="float" label="Step" />
-		</section>
-		
-		<section name="simthresh" title="Similarity threshold (default: 0.7)" help="The minimum similarity allowed between peaks' mass spectra to be considered as equal" >
-			<param name="simthresh_min" value="0.7" type="float" label="From" />
-			<param name="simthresh_max" value="0.7" type="float" label="To" />
-			<param name="simthresh_step" value="0.1" type="float" label="Step" />
-		</section>
-		
-		<section name="snthresh" title="s/n threshold (default: 2)" help="Signal to noise ratio cutoff" >
-			<param name="snthresh_min" value="2" type="integer" label="From" />
-			<param name="snthresh_max" value="2" type="integer" label="To" />
-			<param name="snthresh_step" value="1" type="integer" label="Step" />
-		</section>
-		
-		<section name="step" title="Step (default: 0.05)" help="Step size to use for profile generation" >
-			<param name="step_min" value="0.05" type="float" label="From" />
-			<param name="step_max" value="0.05" type="float" label="To" />
-			<param name="step_step" value="0.01" type="float" label="Step" />
-		</section>
-		
-		<section name="steps" title="Steps (default: 2)" help="Number of steps to merge prior to filtration" >
-			<param name="steps_min" value="2" type="integer" label="From" />
-			<param name="steps_max" value="2" type="integer" label="To" />
-			<param name="steps_step" value="1" type="integer" label="Step" />
-		</section>
-		
-		<section name="outputs_options" title="Output options" expanded="True" >
-			<conditional name="count_duplicates" >
-				<param name="count_duplicates_bool" type="boolean" checked="false" label="Count the duplicates?" help="Counting the number of duplicates for each test slows the process down" />
-				<when value="true" >
-					<param name="duplicates_delta_rt" value="0.1" type="float" label="Delta rt" help="Maximum difference in two ions' m/z to consider them as equal" />
-					<param name="duplicates_delta_mz" value="0.1" type="float" label="Delta m/z" help="Maximum difference in two ions' rt to consider them as equal" />
-				</when>
-				<when value="false" />
-			</conditional>
-			
-			<conditional name="check_ions" >
-				<param name="check_ions_bool" type="boolean" checked="false" label="Check the presence of ions?" help="Checking the presence of ions for each test slows the process down" />
-				<when value="true" >
-					<param name="ions_delta_rt" value="0.1" type="float" label="Delta rt" help="Maximum difference in two ions' m/z to consider them as equal" />
-					<param name="ions_delta_mz" value="0.5" type="float" label="Delta m/z" help="Maximum difference in two ions' rt to consider them as equal" />
-					<repeat name="ions_to_check" title="Check ion" min="1" help="Add as many ions as necessary" >
-						<param name="name" value="Valine" type="text" label="Name" help="Name to be displayed. Ex: Valine" />
-						<param name="rt" value="9.67" type="float" label="Retention time" help="The ion's retention time. Ex: 9.67" />
-						<param name="mzs" value="218.1105,219.1146" type="text" label="Mass-to-charge ratio for each isotope" help="List of the isotopes' m/zs separated by commas, without spaces. Ex: 218.1105,219.1146"/>
-					</repeat>
-				</when>
-				<when value="false" />
-			</conditional>
-		</section>
-	</inputs>
-  
-	<outputs>
-		<data name="summary_out" label="summary.tsv" format="tabular" />
-		<data name="peakspectra_out" label="peakspectra.zip" format="zip" />
-		<data name="intensity_graph_out" label="intensity_graph.pdf" format="pdf" />
-		<data name="duplicates_out" label="duplicates.zip" format="zip" >
-			<filter>outputs_options['count_duplicates']['count_duplicates_bool']</filter>
-		</data>
-		<data name="compare_graph_out" label="compare_graph.pdf" format="pdf" />
-    </outputs>
-	
-	<help>
-
-**Author** Pauline Ribeyre
-
-=========
-GCMS-test
-=========
-
------------
-Description
------------
-
-This tool allows the user to run the method metaMS.runGC() with several sets of parameters and to compare the results of each run.
-
------------------
-Workflow position
------------------
-
-**Upstream tools**
-
-You must start from here.
-
-**Downstream tools**
-
-+---------------------------+---------------------------------------+--------+
-| Name                      | Output file                           | Format |
-+===========================+=======================================+========+
-|                           |                                       |        |
-+---------------------------+---------------------------------------+--------+
-
-----------
-Parameters
-----------
-
-The **zip file** contains the data (the chromatograms to analyse with metaMS.runGC).
-
-Each section allows the user to vary one parameter by choosing a start value, a stop value and a step for this parameter.
-
-The last section allows the user to set output preferences.
-
--------
-Outputs
--------
-
-The output file **summary.tsv** is a tabular file. It contains the summary of the tests.
-
-The output file **peakspectra.zip** is a zip file. It contains the .msp and .tsv peakspectra files for each test.
-
-The output file **intensity_graph.zip** is a pdf file. It contains the "number of ions vs intensity range" graph for each test.
-
-The output file **duplicates.zip** is a zip file. It contains the .tsv duplicates file for each test.
-
----------
-Changelog
----------
-
-**Version 1.0 - 21/07/2017**
-
-	</help>
-</tool>
\ No newline at end of file
--- a/Testtest/GCMS-test_analyze.R	Thu Nov 23 08:50:14 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,149 +0,0 @@
-# author: Pauline Ribeyre
-
-
-#####################
-# required packages #
-#####################
-
-library("metaMS") # provides "runGC" function
-
-
-############
-# analysis #
-############
-
-peakspectra_table <- function(GC_results, file_title) {
-  # Saves the pseudospectra in 2 files (text and tabular formats).
-  
-  names <- c()
-  rts <- c()
-  rt.sds <- c()
-  mzs_df <- list()
-  for (ps in GC_results$PseudoSpectra) {
-    
-    name <- ps$Name
-    rt <- ps$rt
-    rt.sd <- ps$rt.sd
-    
-    names <- c(names, name)
-    rts <- c(rts, rt)
-    rt.sds <- c(rt.sds, rt.sd)
-    
-    spectrum <- data.frame(ps$pspectrum)
-    mz <- c()
-    maxo <- c()
-    for (i in 1:nrow(spectrum)) {
-      ion <- spectrum[i,]
-      mz <- c(mz, ion$mz)
-      maxo <- c(maxo, ion$maxo)
-    }
-    df <- data.frame(name, rt, rt.sd, mz, maxo)
-    mzs_df[[length(mzs_df) + 1]] <- df
-    
-  }
-  
-  df = do.call(rbind, mzs_df)
-  
-  write.table(df,
-              file = file_title,
-              quote = FALSE,
-              row.names = FALSE,
-              sep = "\t")
-  
-}
-
-
-my_runGC <- function(n, cdf_files, titles_to_test, settings_to_test) {
-  # Runs the data analysis and records the results.
-  #
-  # Args:
-  #   n: index of the current test, to select the corresponding title and settings set.
-  #   cdf_files: list of the data files' names.
-  #   titles_to_test:  list of titles (one for each settings set) (concatenation of the values taken by the varied parameters).
-  #   settings_to_test: list of settings sets for runGC.
-  
-  library("metaMS")
-  
-  settings <- settings_to_test[n][[1]]
-  title <- titles_to_test[n]
-  
-  print(title)
-  
-  if (!file.exists(paste0("Peak_tables/", title, ".tsv"))) {
-
-    # run
-    GC_results <- runGC(files = cdf_files, settings = settings, returnXset = TRUE, nSlaves = 20)
-
-    # order the result table by retention time
-    peak_table <- GC_results$PeakTable <- GC_results$PeakTable[order(GC_results$PeakTable[,"rt"]),]
-    peak_table_values <- peak_table[,5:(5 + length(cdf_files) - 1)]
-    peak_table$nb_zeros <- apply(peak_table_values, 1, function(x) sum(x == 0))
-    zeros_per_l <- sum(peak_table$nb_zeros)/nrow(peak_table)
-
-    # record the table in a file
-    file_title <- paste0("Peak_tables/", title, ".tsv")
-    write.table(peak_table, file = file_title, sep = "\t", row.names = FALSE) # /!\ title length -> cannot open connexion
-    
-    # record the RData
-    file_title <- paste0("RDatas/", title, ".RData")
-    # save(GC_results, settings, file = file_title)
-    save(GC_results, file = file_title)
-    
-    # record the pseudospectra in files (.msp and .tsv)
-    file_title <- paste0("Pseudospectra/", title, ".msp")
-    write.msp(GC_results$PseudoSpectra, file = file_title, newFile = TRUE)
-    file_title <- paste0("Pseudospectra/", title, ".tsv")
-    peakspectra_table(GC_results, file_title)
-
-    cat(paste(zeros_per_l, "zeros per line.\n\n"))
-
-  } # end if
-  
-}
-
-
-runGC_vary_parameters_parallel <- function(nb_cores, cdf_files, settings, vary) {
-  # Calculates the number of sets of paramaters and runs the analysis on several cores.
-  #
-  # Args:
-  #   nb_cores: maximum number of cores to use.
-  #   cdf_files: list of the data files' names.
-  #   settings: default settings for runGC().
-  #   vary: list of parameters to vary and the values each parameter must take.
-  
-  # calculate the number of possibilities with the parameters' ranges
-  nb_possibilites <- 1
-  for (param in vary) {
-    range_param <- param[3:length(param)]
-    nb_possibilites <- nb_possibilites * length(range_param)
-  }
-  cat("Settings variations:", nb_possibilites, "combinations.\n")
-  
-  dir.create("RDatas", showWarnings = FALSE) #create the folder where the RDatas will be saved
-  dir.create("Peak_tables", showWarnings = FALSE) #create the folder where the peak tables will be saved
-  # dir.create("Ions_per_intensity", showWarnings = FALSE) #create the folder where the nb of ions per intensity range will be saved
-  dir.create("Pseudospectra", showWarnings = FALSE) #create the folder where the pseudospectra will be saved
-
-  time.start <- Sys.time() # start the timer
-
-  # run the function on several cores
-  if (length(titles_to_test) < nb_cores)
-    nb_cores <- length(titles_to_test)
-  cluster <- makeCluster(nb_cores)#, outfile = "")
-  parLapplyLB(cluster, 1:length(titles_to_test), my_runGC, cdf_files, titles_to_test, settings_to_test)
-  print(titles_to_test)
-  stopCluster(cluster)
-
-  time.end <- Sys.time() # stop the timer
-  Tdiff <- difftime(time.end, time.start)
-  print(Tdiff)
-  
-}
-
-
-############
-#   main   #
-############
-
-runGC_vary_parameters_parallel(nb_cores, cdf_files, settings, vary_list)
-
--- a/Testtest/GCMS-test_settings.R	Thu Nov 23 08:50:14 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,227 +0,0 @@
-# author: Pauline Ribeyre
-
-
-#####################
-# required packages #
-#####################
-
-library("metaMS") # provides "metaMSsettings" class
-
-
-####################
-# default settings #
-####################
-
-# PeakPicking
-PeakPicking.method = "matchedFilter"
-PeakPicking.step = 0.05
-PeakPicking.steps = 2
-PeakPicking.mzdiff = 0.5
-PeakPicking.fwhm = 5
-PeakPicking.snthresh = 2
-PeakPicking.max = 500
-
-# Alignment
-# ?
-
-# CAMERA
-CAMERA.perfwhm = 1
-
-# DBconstruction
-DBconstruction.minintens = 0.0
-DBconstruction.rttol = 0.1
-DBconstruction.intensityMeasure = "maxo"
-DBconstruction.DBthreshold = .80
-DBconstruction.minfeat = 5
-
-# match2DB
-match2DB.rtdiff = 0.5
-match2DB.minfeat = 5
-# match2DB.rtval = ?
-# match2DB.mzdiff = ?
-# match2DB.ppm = ?
-match2DB.simthresh = 0.8
-match2DB.timeComparison = "rt"
-match2DB.RIdiff = 5
-
-# betweenSamples
-betweenSamples.min.class.fraction = 0.8 # pools
-# betweenSamples.min.class.fraction = 0.5 # samples
-betweenSamples.min.class.size = 5
-betweenSamples.timeComparison = "rt"
-betweenSamples.rtdiff = 0.5
-betweenSamples.RIdiff = 2 # default setting
-# betweenSamples.RIdiff = 0.05 # w4m setting
-betweenSamples.simthresh = 0.8
-
-# matchIrrelevants
-# matchIrrelevants.irrelevantClasses = c("Bleeding", "Plasticizers")
-# matchIrrelevants.RIdiff = ?
-# matchIrrelevants.rtdiff = ?
-# matchIrrelevants.simthresh = ?
-# matchIrrelevants.timeComparison = "RI"
-
-# TODO RI option
-
-
-###################
-# settings object #
-###################
-
-# settings <- metaMSsettings("protocolName" = "settings", "chrom" = "GC")
-
-settings <- metaMSsettings("protocolName" = "settings",
-                            "chrom" = "GC",
-                           
-                            PeakPicking = list(
-                              method = PeakPicking.method,
-                              step = PeakPicking.step,
-                              steps = PeakPicking.steps,
-                              mzdiff = PeakPicking.mzdiff,
-                              fwhm = PeakPicking.fwhm,
-                              snthresh = PeakPicking.snthresh,
-                              max = PeakPicking.max),
-                           
-                            CAMERA = list(perfwhm = CAMERA.perfwhm))
-
-metaSetting(settings, "DBconstruction") <- list(
-  minintens = DBconstruction.minintens,
-  rttol = DBconstruction.rttol,
-  intensityMeasure = DBconstruction.intensityMeasure,
-  DBthreshold = DBconstruction.DBthreshold, 
-  minfeat = DBconstruction.minfeat)
-
-metaSetting(settings, "match2DB") <- list(
-  simthresh = match2DB.simthresh,
-  timeComparison = match2DB.timeComparison,
-  rtdiff = match2DB.rtdiff,
-  RIdiff = match2DB.RIdiff,
-  minfeat = match2DB.minfeat)
-
-metaSetting(settings, "betweenSamples") <- list(
-  min.class.fraction = betweenSamples.min.class.fraction,
-  min.class.size = betweenSamples.min.class.size,
-  timeComparison = betweenSamples.timeComparison,
-  rtdiff = betweenSamples.rtdiff,
-  RIdiff = betweenSamples.RIdiff,    
-  simthresh = betweenSamples.simthresh)
-
-# metaSetting(GALAXY.GC, "matchIrrelevants") <- list(
-#   irrelevantClasses = matchIrrelevants.irrelevantClasses,
-#   RIdiff = matchIrrelevants.RIdiff,
-#   rtdiff = matchIrrelevants.rtdiff,
-#   simthresh = matchIrrelevants.simthresh,
-#   timeComparison = matchIrrelevants.timeComparison)
-
-
-# best settings pool
-settings@betweenSamples.min.class.size = 5
-settings@PeakPicking$fwhm = 6
-settings@PeakPicking$mzdiff = -0.5
-settings@betweenSamples.min.class.fraction = 0.8
-settings@betweenSamples.simthresh = 0.7
-settings@betweenSamples.rtdiff = 0.05
-
-
-# best settings samples
-settings@betweenSamples.min.class.size = 5
-settings@PeakPicking$fwhm = 6
-settings@PeakPicking$mzdiff = 0.5
-settings@betweenSamples.min.class.fraction = 0.5
-settings@betweenSamples.simthresh = 0.7
-settings@betweenSamples.rtdiff = 0.05
-
-
-#################
-# vary settings #
-#################
-
-pop <- function(list) {
-  # Removes the last item from a list.
-  #
-  # Args:
-  #   list: the list to pop.
-  #
-  # Returns:
-  #   The last item of the list before it was removed.
-  
-  val <- list[length(list)]
-  assign(as.character(substitute(list)), list[-length(list)], parent.frame())
-  return (val)
-  
-}
-
-
-vary_list_to_settings <- function(settings, vary_list, title = "", titles_to_test = c(), settings_to_test = c()) {
-  # Reads the list of parameters to vary and their ranges and builds a list of file titles and settings from it.
-  #
-  # Args:
-  #   settings: default settings for runGC.
-  #   vary_list: list of parameters to vary and the values each parameter must take.
-  #   title: title of the current file (concatenation of the values taken by the varied parameters).
-  #   titles_to_test:  list of titles (one for each settings set) (concatenation of the values taken by the varied parameters).
-  #   settings_to_test: list of settings sets for runGC.
-  #
-  # Returns:
-  #   A list containing:
-  #   1: the list of file titles ;
-  #   2: the list of runGC() settings.
-  
-  if (length(vary_list) == 0) { # if there are no parameters to vary
-    
-    titles_to_test <- c(titles_to_test, "default_settings")
-    settings_to_test <- c(settings_to_test, settings)
-    
-  }
-  
-  else {
-    
-    param <- pop(vary_list)[[1]]
-    
-    title <- paste0(title, param[1])
-    setting <- param[2]
-    range <- as.numeric(param[3:length(param)])
-    
-    # for each possible value of the parameter
-    for (p in range) {
-      
-      # change the settings
-      isSlot <- regexpr("\\$", setting)[1]
-      
-      if (isSlot == -1) { # the parameter is a slot in class metaMSsettings
-        slot(settings, setting) <- p
-      }
-      else { # the parameter is not a slot in class metaMSsettings
-        slot(settings, substr(setting, 1, isSlot - 1))[substr(setting, isSlot + 1, nchar(setting))] <- p
-      }
-      
-      new_title <- paste0(title, "=", p, "_")
-      
-      # save the settings in a list
-      if (length(vary_list) == 0) {
-        titles_to_test <- c(titles_to_test, new_title)
-        settings_to_test <- c(settings_to_test, settings)
-      }
-      else {
-        res <- vary_list_to_settings(settings, vary_list, new_title, titles_to_test, settings_to_test)
-        titles_to_test <- res[[1]]
-        settings_to_test <- res[[2]]
-      }
-      
-    } # end for
-    
-  } # end else
-  
-  return (list(titles_to_test, settings_to_test))
-  
-}
-
-
-############
-#   main   #
-############
-
-# settings to vary and ranges of values
-res <- vary_list_to_settings(settings, vary_list)
-titles_to_test <- res[[1]]
-settings_to_test <- res[[2]]
--- a/Testtest/GCMS-test_spectrum.R	Thu Nov 23 08:50:14 2017 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,135 +0,0 @@
-# author: Pauline Ribeyre
-
-
-#############
-# find ions #
-#############
-
-is_ion_present <- function(GC_results, rt, mz_list) {
-  # Looks for a ion in the pseudospectra.
-  #
-  # Args:
-  #   GC_results: result of a test by runGC().
-  #   rt: retention time of the ion to look for.
-  #   mz_list: list of the ion's isotopes' m/z.
-  #
-  # Returns:
-  #   The number of times the ion was found in the pseudospectra.
-  
-  delta_rt <- ions_delta_rt
-  delta_mz <- ions_delta_mz
-  
-  res = 0
-  
-  pseudospectra <- GC_results$PseudoSpectra
-  for (peak in pseudospectra) { # check all the peaks
-    if (peak$rt >= rt - delta_rt && peak$rt <= rt + delta_rt) { # check if the retention time matches
-      
-      ps <- peak$pspectrum
-      found_ion <- rep(NA, length(mz_list))
-      
-      for (i in 1:length(mz_list)) { # check the presence of all the ions given in parameter
-        
-        ion <- mz_list[i][[1]]
-        found_ion[i] <- sum((ps[,"mz"] >= ion - delta_mz) & (ps[,"mz"] <= ion + delta_mz))
-        
-      } # end for
-      
-      if (length(which(found_ion > 1)) > 0) {
-        stop("The ion was found more than 1 time in the same pseudospectrum.")
-      }
-      
-      if (sum(found_ion) == length(mz_list)) {
-        res <- res + 1
-      }
-      
-    } # end if
-  } # end for
-  
-  return (res)
-  
-}
-
-
-##############
-# duplicates #
-##############
-
-count_duplicates_function <- function(GC_results) {
-  # Counts the number of ions that have been found in more than one pseudospectra (slow).
-  #
-  # Args:
-  #   GC_results: result of a test by runGC().
-  #
-  # Returns:
-  #   The number of duplicates.
-  
-  delta_rt <- duplicates_delta_rt
-  delta_mz <- duplicates_delta_mz
-  
-  pseudospectra <- GC_results$PseudoSpectra
-  
-  # order by pseudospectrum rt
-  rts <- sapply(pseudospectra, function(x) x$rt)
-  rts_order <- order(rts)
-  
-  duplicates <- NULL      # record all the duplicates we find
-  already_counted <- NULL # record which duplicates have already been counted
-  
-  for (pspec in 1:length(rts)) { # for each pseudospectrum
-    
-    i <- rts_order[pspec]
-    this_rt <- rts[i]
-    
-    # number of following pseudospectra to consider
-    k <- 0
-    while (pspec + k + 1 <  length(rts_order) && rts[rts_order[pspec + k + 1]] < this_rt + delta_rt) { # check if the retention time matches
-      k <- k + 1
-    }
-    
-    mzs <- lapply(pseudospectra[rts_order[pspec:(pspec + k)]], function(x) x$pspectrum)
-    mzs_sorted <- NULL
-    for (j in 1:(k + 1)) {
-      mzs_sorted <- rbind(mzs_sorted, t(sapply(1:length(mzs[[j]][,1]), function(n) c(rts[rts_order[pspec + j - 1]], mzs[[j]][,1][n]))))
-    }
-    mzs_sorted <- mzs_sorted[order(mzs_sorted[,2]),] # column 1: rt ; column 2: m/z
-    
-    i <- 1
-    while (i <= nrow(mzs_sorted)) { # for each m/z
-      
-      j <- 1
-      this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i,])), data.frame(already_counted))) > 0
-      
-      if (is.null(already_counted) || !this_already_counted) {
-        this_mz <- mzs_sorted[i,2]
-        already_counted <- rbind(already_counted, mzs_sorted[i,])
-        same_ion <- 1 # count how many times this m/z appears
-        
-        while (j <= nrow(mzs_sorted) - i && mzs_sorted[i+j,2] >= this_mz - delta_mz && mzs_sorted[i+j,2] <= this_mz + delta_mz) { # check if the m/z matches
-          this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i+j,])), data.frame(already_counted))) > 0
-          
-          if (!this_already_counted) { # do not count a duplicate more than once
-            same_ion <- same_ion + 1
-            already_counted <- rbind(already_counted, mzs_sorted[i+j,]) # record which duplicates have already been counted
-            this_mz <- mzs_sorted[i+j,2]
-          }
-          
-          j <- j + 1
-        }
-        
-        # first/min m/z, last/max m/z, rt where we started, number of duplicates
-        if (same_ion > 1)
-          duplicates <- rbind(duplicates, c(mzs_sorted[i,2], this_mz, mzs_sorted[i,1], same_ion))
-      }
-      i <- i + j
-    }
-    
-  }
-  
-  if (is.null(duplicates))
-    duplicates <- c(0, 0, 0, 0)
-  
-  return (duplicates)
-  
-}
-