view Testtest/GCMS-test_spectrum.R @ 0:40de28c7d3fb draft

Uploaded
author melpetera
date Thu, 23 Nov 2017 08:50:14 -0500
parents
children
line wrap: on
line source

# author: Pauline Ribeyre


#############
# find ions #
#############

is_ion_present <- function(GC_results, rt, mz_list) {
  # Looks for a ion in the pseudospectra.
  #
  # Args:
  #   GC_results: result of a test by runGC().
  #   rt: retention time of the ion to look for.
  #   mz_list: list of the ion's isotopes' m/z.
  #
  # Returns:
  #   The number of times the ion was found in the pseudospectra.
  
  delta_rt <- ions_delta_rt
  delta_mz <- ions_delta_mz
  
  res = 0
  
  pseudospectra <- GC_results$PseudoSpectra
  for (peak in pseudospectra) { # check all the peaks
    if (peak$rt >= rt - delta_rt && peak$rt <= rt + delta_rt) { # check if the retention time matches
      
      ps <- peak$pspectrum
      found_ion <- rep(NA, length(mz_list))
      
      for (i in 1:length(mz_list)) { # check the presence of all the ions given in parameter
        
        ion <- mz_list[i][[1]]
        found_ion[i] <- sum((ps[,"mz"] >= ion - delta_mz) & (ps[,"mz"] <= ion + delta_mz))
        
      } # end for
      
      if (length(which(found_ion > 1)) > 0) {
        stop("The ion was found more than 1 time in the same pseudospectrum.")
      }
      
      if (sum(found_ion) == length(mz_list)) {
        res <- res + 1
      }
      
    } # end if
  } # end for
  
  return (res)
  
}


##############
# duplicates #
##############

count_duplicates_function <- function(GC_results) {
  # Counts the number of ions that have been found in more than one pseudospectra (slow).
  #
  # Args:
  #   GC_results: result of a test by runGC().
  #
  # Returns:
  #   The number of duplicates.
  
  delta_rt <- duplicates_delta_rt
  delta_mz <- duplicates_delta_mz
  
  pseudospectra <- GC_results$PseudoSpectra
  
  # order by pseudospectrum rt
  rts <- sapply(pseudospectra, function(x) x$rt)
  rts_order <- order(rts)
  
  duplicates <- NULL      # record all the duplicates we find
  already_counted <- NULL # record which duplicates have already been counted
  
  for (pspec in 1:length(rts)) { # for each pseudospectrum
    
    i <- rts_order[pspec]
    this_rt <- rts[i]
    
    # number of following pseudospectra to consider
    k <- 0
    while (pspec + k + 1 <  length(rts_order) && rts[rts_order[pspec + k + 1]] < this_rt + delta_rt) { # check if the retention time matches
      k <- k + 1
    }
    
    mzs <- lapply(pseudospectra[rts_order[pspec:(pspec + k)]], function(x) x$pspectrum)
    mzs_sorted <- NULL
    for (j in 1:(k + 1)) {
      mzs_sorted <- rbind(mzs_sorted, t(sapply(1:length(mzs[[j]][,1]), function(n) c(rts[rts_order[pspec + j - 1]], mzs[[j]][,1][n]))))
    }
    mzs_sorted <- mzs_sorted[order(mzs_sorted[,2]),] # column 1: rt ; column 2: m/z
    
    i <- 1
    while (i <= nrow(mzs_sorted)) { # for each m/z
      
      j <- 1
      this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i,])), data.frame(already_counted))) > 0
      
      if (is.null(already_counted) || !this_already_counted) {
        this_mz <- mzs_sorted[i,2]
        already_counted <- rbind(already_counted, mzs_sorted[i,])
        same_ion <- 1 # count how many times this m/z appears
        
        while (j <= nrow(mzs_sorted) - i && mzs_sorted[i+j,2] >= this_mz - delta_mz && mzs_sorted[i+j,2] <= this_mz + delta_mz) { # check if the m/z matches
          this_already_counted <- nrow(merge(data.frame(t(mzs_sorted[i+j,])), data.frame(already_counted))) > 0
          
          if (!this_already_counted) { # do not count a duplicate more than once
            same_ion <- same_ion + 1
            already_counted <- rbind(already_counted, mzs_sorted[i+j,]) # record which duplicates have already been counted
            this_mz <- mzs_sorted[i+j,2]
          }
          
          j <- j + 1
        }
        
        # first/min m/z, last/max m/z, rt where we started, number of duplicates
        if (same_ion > 1)
          duplicates <- rbind(duplicates, c(mzs_sorted[i,2], this_mz, mzs_sorted[i,1], same_ion))
      }
      i <- i + j
    }
    
  }
  
  if (is.null(duplicates))
    duplicates <- c(0, 0, 0, 0)
  
  return (duplicates)
  
}