Mercurial > repos > tomnl > cameradims

--- a/flag-remove-peaks.R	Mon May 14 08:13:47 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,184 +0,0 @@
-library(XCMSwrapper)
-library(optparse)
-
-option_list <- list(
-  make_option(c("-o", "--out_dir"), type="character", default=getwd(),
-              help="Output folder for resulting files [default = %default]"
-  ),
-  make_option(c("-x", "--xset_path"), type="character", default=file.path(getwd(),"xset.rds"),
-              help="The path to the xcmsSet object [default = %default]"
-  ),
-  make_option("--polarity", default='NA',
-              help="polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]"
-  ),
-  make_option("--rsd_i_blank", default=NA,
-              help="RSD threshold for the blank [default = %default]"
-  ),
-  make_option("--minfrac_blank", default=0.5,
-              help="minimum fraction of files for features needed for the blank [default = %default]"
-  ),
-  make_option("--rsd_rt_blank", default=NA,
-              help="RSD threshold for the RT of the blank [default = %default]"
-  ),
-
-  make_option("--ithres_blank", default=NA,
-              help="Intensity threshold for the blank [default = %default]"
-  ),
-  make_option("--s2b", default=10,
-              help="fold change (sample/blank) needed for sample peak to be allowed. e.g.
-                    if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
-                    1000/10 = 100, so sample has fold change higher than the threshold and the peak
-                    is not considered a blank [default = %default]"
-  ),
-  make_option("--blank_class", default='blank', type="character",
-              help="A string representing the class that will be used for the blank.[default = %default]"
-  ),
-  make_option("--egauss_thr", default=NA,
-              help="Threshold for filtering out non gaussian shaped peaks. Note this only works
-                            if the 'verbose columns' and 'fit gauss' was used with xcms
-                            [default = %default]"
-  ),
-  make_option("--rsd_i_sample", default=NA,
-              help="RSD threshold for the samples [default = %default]"
-  ),
-  make_option("--minfrac_sample", default=0.8,
-              help="minimum fraction of files for features needed for the samples [default = %default]"
-  ),
-  make_option("--rsd_rt_sample", default=NA,
-              help="RSD threshold for the RT of the samples [default = %default]"
-  ),
-  make_option("--ithres_sample", default=5000,
-              help="Intensity threshold for the sample [default = %default]"
-  ),
-  make_option("--grp_rm_ids", default=NA,
-              help="vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output)
-              [default = %default]"
-  ),
-  make_option("--remove_spectra",  action="store_true",
-              help=" TRUE if flagged spectra is to be removed [default = %default]"
-  ),
-  make_option("--minfrac_xcms", default=0.5,
-              help="minfrac for xcms  grouping [default = %default]"
-  ),
-  make_option("--mzwid", default=0.001,
-              help="mzwid for xcms  grouping [default = %default]"
-  ),
-  make_option("--bw", default=5,
-              help="bw for xcms  grouping [default = %default]"
-  ),
-
-  make_option("--temp_save",  action="store_true",
-              help="Assign True if files for each step saved (for testing purposes) [default = %default]"
-  ),
-
-  make_option("--samplelist",  type="character", help="Sample list to determine the blank class"),
-
-  make_option("--xset_name",  default="xset",
-              help="Name of the xcmsSet object within the RData file [default = %default]"
-  )
-
-
-
-)
-
-  #make_option("--multilist", action="store_true"
-  #            help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist -  sample list file has to be provided"
-  #),
-
-# store options
-opt<- parse_args(OptionParser(option_list=option_list))
-
-if (is.null(opt$temp_save)){
-    temp_save<-FALSE
-}else{
-    temp_save<-TRUE
-}
-
-if (is.null(opt$remove_spectra)){
-    remove_spectra<-FALSE
-}else{
-    remove_spectra<-TRUE
-}
-
-
-print(opt)
-
-loadRData <- function(rdata_path, xset_name){
-#loads an RData file, and returns the named xset object if it is there
-    load(rdata_path)
-    return(get(ls()[ls() == xset_name]))
-}
-
-xset<-loadRData(opt$xset_path, opt$xset_name)
-print(xset)
-if (is.null(opt$samplelist)){
-    blank_class <- opt$blank_class
-}else{
-    samplelist <- read.table(opt$samplelist, sep='\t', header=TRUE)
-    samplelist_blank <- unique(samplelist$sample_class[samplelist$blank=='yes'])
-
-    chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class]
-    if (length(chosen_blank)>1){
-        print('ERROR: only 1 blank is currently allowed to be used with this tool')
-        exit()
-    }
-    blank_class <- as.character(chosen_blank)
-    print(blank_class)
-}
-
-
-if (is.null(opt$multilist)){
-    ffrm_out <- XCMSwrapper::flag_remove(xset,
-                        pol=opt$polarity,
-                        rsd_i_blank=opt$rsd_i_blank,
-                        minfrac_blank=opt$minfrac_blank,
-                        rsd_rt_blank=opt$rsd_rt_blank,
-                        ithres_blank=opt$ithres_blank,
-                        s2b=opt$s2b,
-                        ref.class=blank_class,
-                        egauss_thr=opt$egauss_thr,
-                        rsd_i_sample=opt$rsd_i_sample,
-                        minfrac_sample=opt$minfrac_sample,
-                        rsd_rt_sample=opt$rsd_rt_sample,
-                        ithres_sample=opt$ithres_sample,
-                        minfrac_xcms=opt$minfrac_xcms,
-                        mzwid=opt$mzwid,
-                        bw=opt$bw,
-                        out_dir=opt$out_dir,
-                        temp_save=temp_save,
-                        remove_spectra=remove_spectra,
-                        grp_rm_ids=unlist(strsplit(as.character(opt$grp_rm_ids), split=", "))[[1]])
-
-    xset <- ffrm_out[[1]]
-    grp_peaklist <- ffrm_out[[2]]
-    removed_peaks <- ffrm_out[[3]]
-
-    save.image(file=file.path(opt$out_dir, 'xset_filtered.RData'))
-
-    # grpid needed for mspurity ID needed for deconrank... (will clean up at some up)
-    write.table(data.frame('grpid'=rownames(grp_peaklist), 'ID'=rownames(grp_peaklist), grp_peaklist),
-                file.path(opt$out_dir, 'peaklist_filtered.txt'), row.names=FALSE, sep='\t')
-
-    removed_peaks <- data.frame(removed_peaks)
-    write.table(data.frame('ID'=rownames(removed_peaks),removed_peaks),
-        file.path(opt$out_dir, 'removed_peaks.txt'), row.names=FALSE, sep='\t')
-
-}else{
-
-
-   # TODO
-   #xsets <- split(xset, multilist_df$multlist)
-   #
-   #mult_grps <- unique(multilist_df$multlist)
-   #
-   #for (mgrp in mult_grps){
-   #   xset_i <- xsets[mgrp]
-   #   xcms::group(xset_i,
-   #
-   # }
-
-
-
-}
-
-
--- a/msp_split.py	Mon May 14 08:13:47 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,73 +0,0 @@
-from __future__ import print_function
-import argparse
-import textwrap
-import os
-import re
-import csv
-import math
-
-def msp_split(i, o, n):
-    spec_total = lcount('NAME', i)
-    spec_lim = math.ceil(spec_total/float(n))
-    spec_c = 0
-    filelist = []
-    header = ''
-    print('spec_lim', spec_lim)
-    with open(i, 'r') as msp_in:
-        for i in range(1, n+1):
-            with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out:
-                while spec_c <= spec_lim:
-                    if header:
-                        msp_out.write(header)
-                        header = ''
-                    line = msp_in.readline()
-
-                    if not line:
-                        break  # end of file
-
-                    if re.match('^NAME:.*$', line, re.IGNORECASE):
-                        header = line
-                        spec_c += 1
-                    else:
-                        msp_out.write(line)
-                spec_c = 1
-
-    return filelist
-
-def lcount(keyword, fname):
-    with open(fname, 'r') as fin:
-        return sum([1 for line in fin if keyword in line])
-
-def main():
-
-    p = argparse.ArgumentParser(prog='PROG',
-                                formatter_class=argparse.RawDescriptionHelpFormatter,
-                                description='''Create filelist for DMA DIMS nearline workflow''',
-                                epilog=textwrap.dedent('''
-                            -------------------------------------------------------------------------
-
-                            Example Usage
-
-                            python dma-filelist-generation.py -i [dir with sample files], [dir with blank files] -o .
-
-                            '''))
-
-    p.add_argument('-i', dest='i', help='dir with sample files', required=True)
-    p.add_argument('-o', dest='o', help='out dir', required=True)
-    p.add_argument('-n', dest='n',)
-
-
-    args = p.parse_args()
-
-    if not os.path.exists(args.o):
-        os.makedirs(args.o)
-    print('in file', args.i)
-    print('out dir', args.o)
-    print('nm files', args.n)
-
-    msp_split(args.i, args.o, int(args.n))
-
-
-if __name__ == '__main__':
-    main()
-