Mercurial > repos > melpetera > testtest
changeset 1:53f3f87ece44 draft
Deleted selected files
author | melpetera |
---|---|
date | Thu, 23 Nov 2017 09:35:25 -0500 |
parents | 40de28c7d3fb |
children | 0b79cac1bbaa |
files | Testtest/GCMS-test.R Testtest/GCMS-test.xml Testtest/GCMS-test_analyze.R Testtest/GCMS-test_settings.R Testtest/GCMS-test_spectrum.R |
diffstat | 5 files changed, 0 insertions(+), 994 deletions(-) [+] |
line wrap: on
line diff
--- a/Testtest/GCMS-test.R Thu Nov 23 08:50:14 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,220 +0,0 @@ -# author: Pauline Ribeyre - - -##################### -# required packages # -##################### - -library(batch) # provides "parseCommandargv" function - - -############ -# init # -############ - -# read the parameters -argv_list <- parseCommandArgs(evaluate = FALSE) #, trailingOnly = TRUE) -argv <- unlist(argv_list) - -# common environment to share variables between files -env <- new.env() - -# timestamp made unique by adding a random element -env$timestamp <- round(as.numeric(Sys.time()) + runif(1, 0, 100)) - -# set working directory -tmpwd <- paste0("/galaxy/data/", env$timestamp) -# system("chmod -R 0777 .") -# dir.create(tmpwd, mode = "0777") -dir.create(tmpwd) -setwd(tmpwd) - -# readme file -readme_msg <- paste0("\"", env$timestamp, "\"", - " is a temporary folder storing data for an instance of Galaxy tool \"GCMS-test\" launched on ", - Sys.time(), ". ", - "This folder should be deleted automatically at the end of the tool's execution. ", - "If the instance ended or has been aborted and this folder still exists, it can be deleted manually.") -write(readme_msg, file = "README.txt") - -################ -# source files # -################ - -source_local <- function(fname){ - # Gets a file's full name in Galaxy's directories. - # - # Args: - # fname: the name of the file. - # - # Returns: - # The full name of the file. - - argv <- commandArgs(trailingOnly = FALSE) - base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) - return (paste(base_dir, fname, sep="/")) - -} - -env$source_settings <- source_local("GCMS-test_settings.R") -env$source_analyze <- source_local("GCMS-test_analyze.R") -env$source_output <- source_local("GCMS-test_output.R") -env$source_spectrum <- source_local("GCMS-test_spectrum.R") - -env$source_file_names <- "file_names.txt" - - -############# -# functions # -############# - -args_to_vary_list <- function() { - # Creates the list of settings variations using the arguments. - # - # Returns: - # The list of settings variations. - - param <- c("step", "steps", "mzdiff", "fwhm", "simthresh", "snthresh", "max", - "minclassfraction", "minclasssize", "rtdiff") #, "minfeat") - - PeakPicking_param <- c("step", "steps", "mzdiff", "fwhm", "snthresh", "max") - betweenSamples_param <- c("min.class.fraction", "min.class.size", "rtdiff", "simthresh") - - vary <- list() - - for (p in param) { - - min <- as.numeric(argv[[paste0(p, "_min")]]) - max <- as.numeric(argv[[paste0(p, "_max")]]) - step <- as.numeric(argv[[paste0(p, "_step")]]) - - if (p == "minclassfraction") - p <- "min.class.fraction" - else if (p == "minclasssize") - p <- "min.class.size" - - if (p %in% PeakPicking_param) - name <- paste0("PeakPicking$", p) - else if (p %in% betweenSamples_param) - name <- paste0("betweenSamples.", p) - else - stop("\"", p, "\" is not a valid parameter to vary.") - - # prevent seq error - if (min > max) - max <- min - - range <- seq(min, max, step) - vary[[length(vary) + 1]] <- c(p, name, range) - - } - - return (vary) - -} - - -read_ions_to_check <- function() { - # Reads the arguments concerning the ions to check. - - env$ions_name <- list() - env$ions_rt <- list() - env$ions_mzs <- list() - - env$nb_ions_to_check <- length(grep("ions_name_", names(argv))) - - if (env$nb_ions_to_check > 0) { - for (i in 1:env$nb_ions_to_check) { - env$ions_name[length(env$ions_name) + 1] <- argv[[paste0("ions_name_", i)]] - env$ions_rt[length(env$ions_rt) + 1] <- as.numeric(argv[[paste0("ions_rt_", i)]]) - mzs <- argv[[paste0("ions_mzs_", i)]] - mzs <- as.numeric(strsplit(mzs, ",")[[1]]) - env$ions_mzs[length(env$ions_mzs) + 1] <- list(mzs) - } - } - -} - - -############ -# inputs # -############ - -env$nb_cores <- as.numeric(argv[["nb_cores"]]) -cat("Nb cores:", env$nb_cores, "\n") - -# get data from zip file -if (!is.null(argv[["zip_file"]])) { - env$cdf_files <- unzip(argv[["zip_file"]]) -} else { - stop("No files have been provided.") -} - -# vary parameters -env$vary_list <- args_to_vary_list() - -# count duplicate ions -env$count_duplicates <- argv[["count_duplicates"]] == "true" -if (env$count_duplicates) { - - # delta rt - env$duplicates_delta_rt <- as.numeric(argv[["duplicates_delta_rt"]]) - - # delta mz - env$duplicates_delta_mz <- as.numeric(argv[["duplicates_delta_mz"]]) - -} - -# check the presence of ions -env$nb_ions_to_check <- 0 -env$check_ions <- argv[["check_ions"]] == "true" -if (env$check_ions) { - - read_ions_to_check() - - # delta rt - env$ions_delta_rt <- as.numeric(argv[["ions_delta_rt"]]) - - # delta mz - env$ions_delta_mz <- as.numeric(argv[["ions_delta_mz"]]) - -} - - -############# -# outputs # -############# - -env$summary_out <- argv[["summary_out"]] -env$peakspectra_out <- argv[["peakspectra_out"]] -env$intensity_graph_out <- argv[["intensity_graph_out"]] - -if (env$count_duplicates) - env$count_duplicates_out <- argv[["duplicates_out"]] - -env$compare_graph_out <- argv[["compare_graph_out"]] - - -########## -# run # -########## - -cat("---------- Settings ----------\n") -sys.source(env$source_settings, env) - - -cat("---------- Analysis ----------\n") -sys.source(env$source_analyze, env) - - -cat("---------- Output ----------\n") -sys.source(env$source_output, env) - - -########## -# end # -########## - -# delete temporary files -setwd("..") -unlink(tmpwd, recursive = TRUE)
--- a/Testtest/GCMS-test.xml Thu Nov 23 08:50:14 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,263 +0,0 @@ -<tool id="GCMSt" name="GCMS-test" version="17.11"> - <description>Test parameters for GC-MS data preprocessing using metaMS package</description> - - <requirements> - <requirement type="package" version="1.8.0">bioconductor-metams</requirement> - <requirement type="package" version="1.46.0">bioconductor-xcms</requirement> - <requirement type="package" version="1.1_4">r-batch</requirement> - </requirements> - - <stdio> - <exit_code range="1:" level="fatal" /> - </stdio> - - <command> - Rscript $__tool_directory__/GCMS-test.R - - <!-- Inputs --> - - <!-- nb_cores \${GALAXY_SLOTS:-1} --> - nb_cores \${GALAXY_SLOTS} - - zip_file $zip_file - - step_min $step.step_min - step_max $step.step_max - step_step $step.step_step - - steps_min $steps.steps_min - steps_max $steps.steps_max - steps_step $steps.steps_step - - mzdiff_min $mzdiff.mzdiff_min - mzdiff_max $mzdiff.mzdiff_max - mzdiff_step $mzdiff.mzdiff_step - - fwhm_min $fwhm.fwhm_min - fwhm_max $fwhm.fwhm_max - fwhm_step $fwhm.fwhm_step - - simthresh_min $simthresh.simthresh_min - simthresh_max $simthresh.simthresh_max - simthresh_step $simthresh.simthresh_step - - snthresh_min $snthresh.snthresh_min - snthresh_max $snthresh.snthresh_max - snthresh_step $snthresh.snthresh_step - - max_min $max.max_min - max_max $max.max_max - max_step $max.max_step - - minclassfraction_min $minclassfraction.minclassfraction_min - minclassfraction_max $minclassfraction.minclassfraction_max - minclassfraction_step $minclassfraction.minclassfraction_step - - minclasssize_min $minclasssize.minclasssize_min - minclasssize_max $minclasssize.minclasssize_max - minclasssize_step $minclasssize.minclasssize_step - - rtdiff_min $rtdiff.rtdiff_min - rtdiff_max $rtdiff.rtdiff_max - rtdiff_step $rtdiff.rtdiff_step - - <!--minfeat_min $minfeat.minfeat_min - minfeat_max $minfeat.minfeat_max - minfeat_step $minfeat.minfeat_step--> - - count_duplicates $outputs_options.count_duplicates.count_duplicates_bool - #if $outputs_options.count_duplicates.count_duplicates_bool: - duplicates_delta_rt $outputs_options.count_duplicates.duplicates_delta_rt - duplicates_delta_mz $outputs_options.count_duplicates.duplicates_delta_mz - #end if - - check_ions $outputs_options.check_ions.check_ions_bool - #if $outputs_options.check_ions.check_ions_bool: - ions_delta_rt $outputs_options.check_ions.ions_delta_rt - ions_delta_mz $outputs_options.check_ions.ions_delta_mz - - #for $i, $s in enumerate($outputs_options.check_ions.ions_to_check) - ions_name_${i+1} $s.name - ions_rt_${i+1} $s.rt - ions_mzs_${i+1} $s.mzs - #end for - #end if - - <!-- Outputs --> - - summary_out $summary_out - peakspectra_out $peakspectra_out - intensity_graph_out $intensity_graph_out - - #if $outputs_options.count_duplicates.count_duplicates_bool: - duplicates_out $duplicates_out - #end if - - compare_graph_out $compare_graph_out - </command> - - <inputs> - <param name="zip_file" type="data" format="no_unzip.zip,zip" label="Zip file" help="Zip file containing the chromatograms to analyse" /> - - <section name="fwhm" title="FWHM (default: 6)" help="The full width at half maximum of the peaks" > - <param name="fwhm_min" value="6" type="float" label="From" /> - <param name="fwhm_max" value="6" type="float" label="To" /> - <param name="fwhm_step" value="1" type="float" label="Step" /> - </section> - - <section name="max" title="Max (default: 500)" help="Maximum number of peaks per extracted ion chromatogram" > - <param name="max_min" value="500" type="integer" label="From" /> - <param name="max_max" value="500" type="integer" label="To" /> - <param name="max_step" value="1" type="integer" label="Step" /> - </section> - - <section name="minclassfraction" title="Minimum class fraction (default: 0.5)" help="The fraction of samples in which a pseudospectrum is present before it is regarded as an unknown" > - <param name="minclassfraction_min" value="0.5" type="float" label="From" /> - <param name="minclassfraction_max" value="0.5" type="float" label="To" /> - <param name="minclassfraction_step" value="0.5" type="float" label="Step" /> - </section> - - <section name="minclasssize" title="Minimum class size (default: 5)" help="The absolute number of samples in which a pseudospectrum is present before it is regarded as an unknown" > - <param name="minclasssize_min" value="5" type="integer" label="From" /> - <param name="minclasssize_max" value="5" type="integer" label="To" /> - <param name="minclasssize_step" value="1" type="integer" label="Step" /> - </section> - - <!-- <section name="minfeat" title="minfeat (default: 0.05)" help="The minimum number of ion in a mass spectra to consider it a molecule" > - <param name="minfeat_min" value="0.05" type="float" label="From" /> - <param name="minfeat_max" value="0.05" type="float" label="To" /> - <param name="minfeat_step" value="0.01" type="float" label="Step" /> - </section> --> - - <section name="mzdiff" title="m/z diff (default: 0.05)" help="Minimum difference in m/z for peaks with overlapping retention times" > - <param name="mzdiff_min" value="0.05" type="float" label="From" /> - <param name="mzdiff_max" value="0.05" type="float" label="To" /> - <param name="mzdiff_step" value="0.01" type="float" label="Step" /> - </section> - - <section name="rtdiff" title="RT diff (default: 0.05)" help="The allowed RT shift between same molecules in different samples" > - <param name="rtdiff_min" value="0.05" type="float" label="From" /> - <param name="rtdiff_max" value="0.05" type="float" label="To" /> - <param name="rtdiff_step" value="0.01" type="float" label="Step" /> - </section> - - <section name="simthresh" title="Similarity threshold (default: 0.7)" help="The minimum similarity allowed between peaks' mass spectra to be considered as equal" > - <param name="simthresh_min" value="0.7" type="float" label="From" /> - <param name="simthresh_max" value="0.7" type="float" label="To" /> - <param name="simthresh_step" value="0.1" type="float" label="Step" /> - </section> - - <section name="snthresh" title="s/n threshold (default: 2)" help="Signal to noise ratio cutoff" > - <param name="snthresh_min" value="2" type="integer" label="From" /> - <param name="snthresh_max" value="2" type="integer" label="To" /> - <param name="snthresh_step" value="1" type="integer" label="Step" /> - </section> - - <section name="step" title="Step (default: 0.05)" help="Step size to use for profile generation" > - <param name="step_min" value="0.05" type="float" label="From" /> - <param name="step_max" value="0.05" type="float" label="To" /> - <param name="step_step" value="0.01" type="float" label="Step" /> - </section> - - <section name="steps" title="Steps (default: 2)" help="Number of steps to merge prior to filtration" > - <param name="steps_min" value="2" type="integer" label="From" /> - <param name="steps_max" value="2" type="integer" label="To" /> - <param name="steps_step" value="1" type="integer" label="Step" /> - </section> - - <section name="outputs_options" title="Output options" expanded="True" > - <conditional name="count_duplicates" > - <param name="count_duplicates_bool" type="boolean" checked="false" label="Count the duplicates?" help="Counting the number of duplicates for each test slows the process down" /> - <when value="true" > - <param name="duplicates_delta_rt" value="0.1" type="float" label="Delta rt" help="Maximum difference in two ions' m/z to consider them as equal" /> - <param name="duplicates_delta_mz" value="0.1" type="float" label="Delta m/z" help="Maximum difference in two ions' rt to consider them as equal" /> - </when> - <when value="false" /> - </conditional> - - <conditional name="check_ions" > - <param name="check_ions_bool" type="boolean" checked="false" label="Check the presence of ions?" help="Checking the presence of ions for each test slows the process down" /> - <when value="true" > - <param name="ions_delta_rt" value="0.1" type="float" label="Delta rt" help="Maximum difference in two ions' m/z to consider them as equal" /> - <param name="ions_delta_mz" value="0.5" type="float" label="Delta m/z" help="Maximum difference in two ions' rt to consider them as equal" /> - <repeat name="ions_to_check" title="Check ion" min="1" help="Add as many ions as necessary" > - <param name="name" value="Valine" type="text" label="Name" help="Name to be displayed. Ex: Valine" /> - <param name="rt" value="9.67" type="float" label="Retention time" help="The ion's retention time. Ex: 9.67" /> - <param name="mzs" value="218.1105,219.1146" type="text" label="Mass-to-charge ratio for each isotope" help="List of the isotopes' m/zs separated by commas, without spaces. Ex: 218.1105,219.1146"/> - </repeat> - </when> - <when value="false" /> - </conditional> - </section> - </inputs> - - <outputs> - <data name="summary_out" label="summary.tsv" format="tabular" /> - <data name="peakspectra_out" label="peakspectra.zip" format="zip" /> - <data name="intensity_graph_out" label="intensity_graph.pdf" format="pdf" /> - <data name="duplicates_out" label="duplicates.zip" format="zip" > - <filter>outputs_options['count_duplicates']['count_duplicates_bool']</filter> - </data> - <data name="compare_graph_out" label="compare_graph.pdf" format="pdf" /> - </outputs> - - <help> - -**Author** Pauline Ribeyre - -========= -GCMS-test -========= - ------------ -Description ------------ - -This tool allows the user to run the method metaMS.runGC() with several sets of parameters and to compare the results of each run. - ------------------ -Workflow position ------------------ - -**Upstream tools** - -You must start from here. - -**Downstream tools** - -+---------------------------+---------------------------------------+--------+ -| Name | Output file | Format | -+===========================+=======================================+========+ -| | | | -+---------------------------+---------------------------------------+--------+ - ----------- -Parameters ----------- - -The **zip file** contains the data (the chromatograms to analyse with metaMS.runGC). - -Each section allows the user to vary one parameter by choosing a start value, a stop value and a step for this parameter. - -The last section allows the user to set output preferences. - -------- -Outputs -------- - -The output file **summary.tsv** is a tabular file. It contains the summary of the tests. - -The output file **peakspectra.zip** is a zip file. It contains the .msp and .tsv peakspectra files for each test. - -The output file **intensity_graph.zip** is a pdf file. It contains the "number of ions vs intensity range" graph for each test. - -The output file **duplicates.zip** is a zip file. It contains the .tsv duplicates file for each test. - ---------- -Changelog ---------- - -**Version 1.0 - 21/07/2017** - - </help> -</tool> \ No newline at end of file
--- a/Testtest/GCMS-test_analyze.R Thu Nov 23 08:50:14 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,149 +0,0 @@ -# author: Pauline Ribeyre - - -##################### -# required packages # -##################### - -library("metaMS") # provides "runGC" function - - -############ -# analysis # -############ - -peakspectra_table <- function(GC_results, file_title) { - # Saves the pseudospectra in 2 files (text and tabular formats). - - names <- c() - rts <- c() - rt.sds <- c() - mzs_df <- list() - for (ps in GC_results$PseudoSpectra) { - - name <- ps$Name - rt <- ps$rt - rt.sd <- ps$rt.sd - - names <- c(names, name) - rts <- c(rts, rt) - rt.sds <- c(rt.sds, rt.sd) - - spectrum <- data.frame(ps$pspectrum) - mz <- c() - maxo <- c() - for (i in 1:nrow(spectrum)) { - ion <- spectrum[i,] - mz <- c(mz, ion$mz) - maxo <- c(maxo, ion$maxo) - } - df <- data.frame(name, rt, rt.sd, mz, maxo) - mzs_df[[length(mzs_df) + 1]] <- df - - } - - df = do.call(rbind, mzs_df) - - write.table(df, - file = file_title, - quote = FALSE, - row.names = FALSE, - sep = "\t") - -} - - -my_runGC <- function(n, cdf_files, titles_to_test, settings_to_test) { - # Runs the data analysis and records the results. - # - # Args: - # n: index of the current test, to select the corresponding title and settings set. - # cdf_files: list of the data files' names. - # titles_to_test: list of titles (one for each settings set) (concatenation of the values taken by the varied parameters). - # settings_to_test: list of settings sets for runGC. - - library("metaMS") - - settings <- settings_to_test[n][[1]] - title <- titles_to_test[n] - - print(title) - - if (!file.exists(paste0("Peak_tables/", title, ".tsv"))) { - - # run - GC_results <- runGC(files = cdf_files, settings = settings, returnXset = TRUE, nSlaves = 20) - - # order the result table by retention time - peak_table <- GC_results$PeakTable <- GC_results$PeakTable[order(GC_results$PeakTable[,"rt"]),] - peak_table_values <- peak_table[,5:(5 + length(cdf_files) - 1)] - peak_table$nb_zeros <- apply(peak_table_values, 1, function(x) sum(x == 0)) - zeros_per_l <- sum(peak_table$nb_zeros)/nrow(peak_table) - - # record the table in a file - file_title <- paste0("Peak_tables/", title, ".tsv") - write.table(peak_table, file = file_title, sep = "\t", row.names = FALSE) # /!\ title length -> cannot open connexion - - # record the RData - file_title <- paste0("RDatas/", title, ".RData") - # save(GC_results, settings, file = file_title) - save(GC_results, file = file_title) - - # record the pseudospectra in files (.msp and .tsv) - file_title <- paste0("Pseudospectra/", title, ".msp") - write.msp(GC_results$PseudoSpectra, file = file_title, newFile = TRUE) - file_title <- paste0("Pseudospectra/", title, ".tsv") - peakspectra_table(GC_results, file_title) - - cat(paste(zeros_per_l, "zeros per line.\n\n")) - - } # end if - -} - - -runGC_vary_parameters_parallel <- function(nb_cores, cdf_files, settings, vary) { - # Calculates the number of sets of paramaters and runs the analysis on several cores. - # - # Args: - # nb_cores: maximum number of cores to use. - # cdf_files: list of the data files' names. - # settings: default settings for runGC(). - # vary: list of parameters to vary and the values each parameter must take. - - # calculate the number of possibilities with the parameters' ranges - nb_possibilites <- 1 - for (param in vary) { - range_param <- param[3:length(param)] - nb_possibilites <- nb_possibilites * length(range_param) - } - cat("Settings variations:", nb_possibilites, "combinations.\n") - - dir.create("RDatas", showWarnings = FALSE) #create the folder where the RDatas will be saved - dir.create("Peak_tables", showWarnings = FALSE) #create the folder where the peak tables will be saved - # dir.create("Ions_per_intensity", showWarnings = FALSE) #create the folder where the nb of ions per intensity range will be saved - dir.create("Pseudospectra", showWarnings = FALSE) #create the folder where the pseudospectra will be saved - - time.start <- Sys.time() # start the timer - - # run the function on several cores - if (length(titles_to_test) < nb_cores) - nb_cores <- length(titles_to_test) - cluster <- makeCluster(nb_cores)#, outfile = "") - parLapplyLB(cluster, 1:length(titles_to_test), my_runGC, cdf_files, titles_to_test, settings_to_test) - print(titles_to_test) - stopCluster(cluster) - - time.end <- Sys.time() # stop the timer - Tdiff <- difftime(time.end, time.start) - print(Tdiff) - -} - - -############ -# main # -############ - -runGC_vary_parameters_parallel(nb_cores, cdf_files, settings, vary_list) -
--- a/Testtest/GCMS-test_settings.R Thu Nov 23 08:50:14 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,227 +0,0 @@ -# author: Pauline Ribeyre - - -##################### -# required packages # -##################### - -library("metaMS") # provides "metaMSsettings" class - - -#################### -# default settings # -#################### - -# PeakPicking -PeakPicking.method = "matchedFilter" -PeakPicking.step = 0.05 -PeakPicking.steps = 2 -PeakPicking.mzdiff = 0.5 -PeakPicking.fwhm = 5 -PeakPicking.snthresh = 2 -PeakPicking.max = 500 - -# Alignment -# ? - -# CAMERA -CAMERA.perfwhm = 1 - -# DBconstruction -DBconstruction.minintens = 0.0 -DBconstruction.rttol = 0.1 -DBconstruction.intensityMeasure = "maxo" -DBconstruction.DBthreshold = .80 -DBconstruction.minfeat = 5 - -# match2DB -match2DB.rtdiff = 0.5 -match2DB.minfeat = 5 -# match2DB.rtval = ? -# match2DB.mzdiff = ? -# match2DB.ppm = ? -match2DB.simthresh = 0.8 -match2DB.timeComparison = "rt" -match2DB.RIdiff = 5 - -# betweenSamples -betweenSamples.min.class.fraction = 0.8 # pools -# betweenSamples.min.class.fraction = 0.5 # samples -betweenSamples.min.class.size = 5 -betweenSamples.timeComparison = "rt" -betweenSamples.rtdiff = 0.5 -betweenSamples.RIdiff = 2 # default setting -# betweenSamples.RIdiff = 0.05 # w4m setting -betweenSamples.simthresh = 0.8 - -# matchIrrelevants -# matchIrrelevants.irrelevantClasses = c("Bleeding", "Plasticizers") -# matchIrrelevants.RIdiff = ? -# matchIrrelevants.rtdiff = ? -# matchIrrelevants.simthresh = ? -# matchIrrelevants.timeComparison = "RI" - -# TODO RI option - - -################### -# settings object # -################### - -# settings <- metaMSsettings("protocolName" = "settings", "chrom" = "GC") - -settings <- metaMSsettings("protocolName" = "settings", - "chrom" = "GC", - - PeakPicking = list( - method = PeakPicking.method, - step = PeakPicking.step, - steps = PeakPicking.steps, - mzdiff = PeakPicking.mzdiff, - fwhm = PeakPicking.fwhm, - snthresh = PeakPicking.snthresh, - max = PeakPicking.max), - - CAMERA = list(perfwhm = CAMERA.perfwhm)) - -metaSetting(settings, "DBconstruction") <- list( - minintens = DBconstruction.minintens, - rttol = DBconstruction.rttol, - intensityMeasure = DBconstruction.intensityMeasure, - DBthreshold = DBconstruction.DBthreshold, - minfeat = DBconstruction.minfeat) - -metaSetting(settings, "match2DB") <- list( - simthresh = match2DB.simthresh, - timeComparison = match2DB.timeComparison, - rtdiff = match2DB.rtdiff, - RIdiff = match2DB.RIdiff, - minfeat = match2DB.minfeat) - -metaSetting(settings, "betweenSamples") <- list( - min.class.fraction = betweenSamples.min.class.fraction, - min.class.size = betweenSamples.min.class.size, - timeComparison = betweenSamples.timeComparison, - rtdiff = betweenSamples.rtdiff, - RIdiff = betweenSamples.RIdiff, - simthresh = betweenSamples.simthresh) - -# metaSetting(GALAXY.GC, "matchIrrelevants") <- list( -# irrelevantClasses = matchIrrelevants.irrelevantClasses, -# RIdiff = matchIrrelevants.RIdiff, -# rtdiff = matchIrrelevants.rtdiff, -# simthresh = matchIrrelevants.simthresh, -# timeComparison = matchIrrelevants.timeComparison) - - -# best settings pool -settings@betweenSamples.min.class.size = 5 -settings@PeakPicking$fwhm = 6 -settings@PeakPicking$mzdiff = -0.5 -settings@betweenSamples.min.class.fraction = 0.8 -settings@betweenSamples.simthresh = 0.7 -settings@betweenSamples.rtdiff = 0.05 - - -# best settings samples -settings@betweenSamples.min.class.size = 5 -settings@PeakPicking$fwhm = 6 -settings@PeakPicking$mzdiff = 0.5 -settings@betweenSamples.min.class.fraction = 0.5 -settings@betweenSamples.simthresh = 0.7 -settings@betweenSamples.rtdiff = 0.05 - - -################# -# vary settings # -################# - -pop <- function(list) { - # Removes the last item from a list. - # - # Args: - # list: the list to pop. - # - # Returns: - # The last item of the list before it was removed. - - val <- list[length(list)] - assign(as.character(substitute(list)), list[-length(list)], parent.frame()) - return (val) - -} - - -vary_list_to_settings <- function(settings, vary_list, title = "", titles_to_test = c(), settings_to_test = c()) { - # Reads the list of parameters to vary and their ranges and builds a list of file titles and settings from it. - # - # Args: - # settings: default settings for runGC. - # vary_list: list of parameters to vary and the values each parameter must take. - # title: title of the current file (concatenation of the values taken by the varied parameters). - # titles_to_test: list of titles (one for each settings set) (concatenation of the values taken by the varied parameters). - # settings_to_test: list of settings sets for runGC. - # - # Returns: - # A list containing: - # 1: the list of file titles ; - # 2: the list of runGC() settings. - - if (length(vary_list) == 0) { # if there are no parameters to vary - - titles_to_test <- c(titles_to_test, "default_settings") - settings_to_test <- c(settings_to_test, settings) - - } - - else { - - param <- pop(vary_list)[[1]] - - title <- paste0(title, param[1]) - setting <- param[2] - range <- as.numeric(param[3:length(param)]) - - # for each possible value of the parameter - for (p in range) { - - # change the settings - isSlot <- regexpr("\\$", setting)[1] - - if (isSlot == -1) { # the parameter is a slot in class metaMSsettings - slot(settings, setting) <- p - } - else { # the parameter is not a slot in class metaMSsettings - slot(settings, substr(setting, 1, isSlot - 1))[substr(setting, isSlot + 1, nchar(setting))] <- p - } - - new_title <- paste0(title, "=", p, "_") - - # save the settings in a list - if (length(vary_list) == 0) { - titles_to_test <- c(titles_to_test, new_title) - settings_to_test <- c(settings_to_test, settings) - } - else { - res <- vary_list_to_settings(settings, vary_list, new_title, titles_to_test, settings_to_test) - titles_to_test <- res[[1]] - settings_to_test <- res[[2]] - } - - } # end for - - } # end else - - return (list(titles_to_test, settings_to_test)) - -} - - -############ -# main # -############ - -# settings to vary and ranges of values -res <- vary_list_to_settings(settings, vary_list) -titles_to_test <- res[[1]] -settings_to_test <- res[[2]]
--- a/Testtest/GCMS-test_spectrum.R Thu Nov 23 08:50:14 2017 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,135 +0,0 @@ -# author: Pauline Ribeyre - - -############# -# find ions # -############# - -is_ion_present <- function(GC_results, rt, mz_list) { - # Looks for a ion in the pseudospectra. - # - # Args: - # GC_results: result of a test by runGC(). - # rt: retention time of the ion to look for. - # mz_list: list of the ion's isotopes' m/z. - # - # Returns: - # The number of times the ion was found in the pseudospectra. - - delta_rt <- ions_delta_rt - delta_mz <- ions_delta_mz - - res = 0 - - pseudospectra <- GC_results$PseudoSpectra - for (peak in pseudospectra) { # check all the peaks - if (peak$rt >= rt - delta_rt && peak$rt <= rt + delta_rt) { # check if the retention time matches - - ps <- peak$pspectrum - found_ion <- rep(NA, length(mz_list)) - - for (i in 1:length(mz_list)) { # check the presence of all the ions given in parameter - - ion <- mz_list[i][[1]] - found_ion[i] <- sum((ps[,"mz"] >= ion - delta_mz) & (ps[,"mz"] <= ion + delta_mz)) - - } # end for - - if (length(which(found_ion > 1)) > 0) { - stop("The ion was found more than 1 time in the same pseudospectrum.") - } - - if (sum(found_ion) == length(mz_list)) { - res <- res + 1 - } - - } # end if - } # end for - - return (res) - -} - - -############## -# duplicates # -############## - -count_duplicates_function <- function(GC_results) { - # Counts the number of ions that have been found in more than one pseudospectra (slow). - # - # Args: - # GC_results: result of a test by runGC(). - # - # Returns: - # The number of duplicates. - - delta_rt <- duplicates_delta_rt - delta_mz <- duplicates_delta_mz - - pseudospectra <- GC_results$PseudoSpectra - - # order by pseudospectrum rt - rts <- sapply(pseudospectra, function(x) x$rt) - rts_order <- order(rts) - - duplicates <- NULL # record all the duplicates we find - already_counted <- NULL # record which duplicates have already been counted - - for (pspec in 1:length(rts)) { # for each pseudospectrum - - i <- rts_order[pspec] - this_rt <- rts[i] - - # number of following pseudospectra to consider - k <- 0 - while (pspec + k + 1 < length(rts_order) && rts[rts_order[pspec + k + 1]] < this_rt + delta_rt) { # check if the retention time matches - k <- k + 1 - } - - mzs <- lapply(pseudospectra[rts_order[pspec:(pspec + k)]], function(x) x$pspectrum) - mzs_sorted <- NULL - for (j in 1:(k + 1)) { - mzs_sorted <- rbind(mzs_sorted, t(sapply(1:length(mzs[[j]][,1]), function(n) c(rts[rts_order[pspec + j - 1]], mzs[[j]][,1][n])))) - } - mzs_sorted <- mzs_sorted[order(mzs_sorted[,2]),] # column 1: rt ; column 2: m/z - - i <- 1 - while (i <= nrow(mzs_sorted)) { # for each m/z - - j <- 1 - this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i,])), data.frame(already_counted))) > 0 - - if (is.null(already_counted) || !this_already_counted) { - this_mz <- mzs_sorted[i,2] - already_counted <- rbind(already_counted, mzs_sorted[i,]) - same_ion <- 1 # count how many times this m/z appears - - while (j <= nrow(mzs_sorted) - i && mzs_sorted[i+j,2] >= this_mz - delta_mz && mzs_sorted[i+j,2] <= this_mz + delta_mz) { # check if the m/z matches - this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i+j,])), data.frame(already_counted))) > 0 - - if (!this_already_counted) { # do not count a duplicate more than once - same_ion <- same_ion + 1 - already_counted <- rbind(already_counted, mzs_sorted[i+j,]) # record which duplicates have already been counted - this_mz <- mzs_sorted[i+j,2] - } - - j <- j + 1 - } - - # first/min m/z, last/max m/z, rt where we started, number of duplicates - if (same_ion > 1) - duplicates <- rbind(duplicates, c(mzs_sorted[i,2], this_mz, mzs_sorted[i,1], same_ion)) - } - i <- i + j - } - - } - - if (is.null(duplicates)) - duplicates <- c(0, 0, 0, 0) - - return (duplicates) - -} -