Mercurial > repos > tomnl > cameradims
changeset 11:7beea19d176e draft
planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit 9c1dec94a397a8c34b10f0fe35df4159daf53ae3
author | tomnl |
---|---|
date | Tue, 19 Jun 2018 09:48:24 -0400 |
parents | 4b2ae555a5f8 |
children | b8433aedbc85 |
files | flag-remove-peaks.R msp_split.py |
diffstat | 2 files changed, 0 insertions(+), 257 deletions(-) [+] |
line wrap: on
line diff
--- a/flag-remove-peaks.R Mon May 14 08:13:47 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,184 +0,0 @@ -library(XCMSwrapper) -library(optparse) - -option_list <- list( - make_option(c("-o", "--out_dir"), type="character", default=getwd(), - help="Output folder for resulting files [default = %default]" - ), - make_option(c("-x", "--xset_path"), type="character", default=file.path(getwd(),"xset.rds"), - help="The path to the xcmsSet object [default = %default]" - ), - make_option("--polarity", default='NA', - help="polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" - ), - make_option("--rsd_i_blank", default=NA, - help="RSD threshold for the blank [default = %default]" - ), - make_option("--minfrac_blank", default=0.5, - help="minimum fraction of files for features needed for the blank [default = %default]" - ), - make_option("--rsd_rt_blank", default=NA, - help="RSD threshold for the RT of the blank [default = %default]" - ), - - make_option("--ithres_blank", default=NA, - help="Intensity threshold for the blank [default = %default]" - ), - make_option("--s2b", default=10, - help="fold change (sample/blank) needed for sample peak to be allowed. e.g. - if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. - 1000/10 = 100, so sample has fold change higher than the threshold and the peak - is not considered a blank [default = %default]" - ), - make_option("--blank_class", default='blank', type="character", - help="A string representing the class that will be used for the blank.[default = %default]" - ), - make_option("--egauss_thr", default=NA, - help="Threshold for filtering out non gaussian shaped peaks. Note this only works - if the 'verbose columns' and 'fit gauss' was used with xcms - [default = %default]" - ), - make_option("--rsd_i_sample", default=NA, - help="RSD threshold for the samples [default = %default]" - ), - make_option("--minfrac_sample", default=0.8, - help="minimum fraction of files for features needed for the samples [default = %default]" - ), - make_option("--rsd_rt_sample", default=NA, - help="RSD threshold for the RT of the samples [default = %default]" - ), - make_option("--ithres_sample", default=5000, - help="Intensity threshold for the sample [default = %default]" - ), - make_option("--grp_rm_ids", default=NA, - help="vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) - [default = %default]" - ), - make_option("--remove_spectra", action="store_true", - help=" TRUE if flagged spectra is to be removed [default = %default]" - ), - make_option("--minfrac_xcms", default=0.5, - help="minfrac for xcms grouping [default = %default]" - ), - make_option("--mzwid", default=0.001, - help="mzwid for xcms grouping [default = %default]" - ), - make_option("--bw", default=5, - help="bw for xcms grouping [default = %default]" - ), - - make_option("--temp_save", action="store_true", - help="Assign True if files for each step saved (for testing purposes) [default = %default]" - ), - - make_option("--samplelist", type="character", help="Sample list to determine the blank class"), - - make_option("--xset_name", default="xset", - help="Name of the xcmsSet object within the RData file [default = %default]" - ) - - - -) - - #make_option("--multilist", action="store_true" - # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" - #), - -# store options -opt<- parse_args(OptionParser(option_list=option_list)) - -if (is.null(opt$temp_save)){ - temp_save<-FALSE -}else{ - temp_save<-TRUE -} - -if (is.null(opt$remove_spectra)){ - remove_spectra<-FALSE -}else{ - remove_spectra<-TRUE -} - - -print(opt) - -loadRData <- function(rdata_path, xset_name){ -#loads an RData file, and returns the named xset object if it is there - load(rdata_path) - return(get(ls()[ls() == xset_name])) -} - -xset<-loadRData(opt$xset_path, opt$xset_name) -print(xset) -if (is.null(opt$samplelist)){ - blank_class <- opt$blank_class -}else{ - samplelist <- read.table(opt$samplelist, sep='\t', header=TRUE) - samplelist_blank <- unique(samplelist$sample_class[samplelist$blank=='yes']) - - chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] - if (length(chosen_blank)>1){ - print('ERROR: only 1 blank is currently allowed to be used with this tool') - exit() - } - blank_class <- as.character(chosen_blank) - print(blank_class) -} - - -if (is.null(opt$multilist)){ - ffrm_out <- XCMSwrapper::flag_remove(xset, - pol=opt$polarity, - rsd_i_blank=opt$rsd_i_blank, - minfrac_blank=opt$minfrac_blank, - rsd_rt_blank=opt$rsd_rt_blank, - ithres_blank=opt$ithres_blank, - s2b=opt$s2b, - ref.class=blank_class, - egauss_thr=opt$egauss_thr, - rsd_i_sample=opt$rsd_i_sample, - minfrac_sample=opt$minfrac_sample, - rsd_rt_sample=opt$rsd_rt_sample, - ithres_sample=opt$ithres_sample, - minfrac_xcms=opt$minfrac_xcms, - mzwid=opt$mzwid, - bw=opt$bw, - out_dir=opt$out_dir, - temp_save=temp_save, - remove_spectra=remove_spectra, - grp_rm_ids=unlist(strsplit(as.character(opt$grp_rm_ids), split=", "))[[1]]) - - xset <- ffrm_out[[1]] - grp_peaklist <- ffrm_out[[2]] - removed_peaks <- ffrm_out[[3]] - - save.image(file=file.path(opt$out_dir, 'xset_filtered.RData')) - - # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) - write.table(data.frame('grpid'=rownames(grp_peaklist), 'ID'=rownames(grp_peaklist), grp_peaklist), - file.path(opt$out_dir, 'peaklist_filtered.txt'), row.names=FALSE, sep='\t') - - removed_peaks <- data.frame(removed_peaks) - write.table(data.frame('ID'=rownames(removed_peaks),removed_peaks), - file.path(opt$out_dir, 'removed_peaks.txt'), row.names=FALSE, sep='\t') - -}else{ - - - # TODO - #xsets <- split(xset, multilist_df$multlist) - # - #mult_grps <- unique(multilist_df$multlist) - # - #for (mgrp in mult_grps){ - # xset_i <- xsets[mgrp] - # xcms::group(xset_i, - # - # } - - - -} - -
--- a/msp_split.py Mon May 14 08:13:47 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ -from __future__ import print_function -import argparse -import textwrap -import os -import re -import csv -import math - -def msp_split(i, o, n): - spec_total = lcount('NAME', i) - spec_lim = math.ceil(spec_total/float(n)) - spec_c = 0 - filelist = [] - header = '' - print('spec_lim', spec_lim) - with open(i, 'r') as msp_in: - for i in range(1, n+1): - with open(os.path.join(o, 'file{}.msp'.format(str(i).zfill(len(str(n))))), 'w+') as msp_out: - while spec_c <= spec_lim: - if header: - msp_out.write(header) - header = '' - line = msp_in.readline() - - if not line: - break # end of file - - if re.match('^NAME:.*$', line, re.IGNORECASE): - header = line - spec_c += 1 - else: - msp_out.write(line) - spec_c = 1 - - return filelist - -def lcount(keyword, fname): - with open(fname, 'r') as fin: - return sum([1 for line in fin if keyword in line]) - -def main(): - - p = argparse.ArgumentParser(prog='PROG', - formatter_class=argparse.RawDescriptionHelpFormatter, - description='''Create filelist for DMA DIMS nearline workflow''', - epilog=textwrap.dedent(''' - ------------------------------------------------------------------------- - - Example Usage - - python dma-filelist-generation.py -i [dir with sample files], [dir with blank files] -o . - - ''')) - - p.add_argument('-i', dest='i', help='dir with sample files', required=True) - p.add_argument('-o', dest='o', help='out dir', required=True) - p.add_argument('-n', dest='n',) - - - args = p.parse_args() - - if not os.path.exists(args.o): - os.makedirs(args.o) - print('in file', args.i) - print('out dir', args.o) - print('nm files', args.n) - - msp_split(args.i, args.o, int(args.n)) - - -if __name__ == '__main__': - main() -