comparison dimsPredictPuritySingle.R @ 0:1251fc5c058e draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2948ce35fa7fffe5a64711cb30be971031e79019-dirty
author tomnl
date Fri, 24 May 2019 09:11:29 -0400
parents
children 8398f2a364ee
comparison
equal deleted inserted replaced
-1:000000000000 0:1251fc5c058e
1 library(msPurity)
2 library(optparse)
3 print(sessionInfo())
4
5 option_list <- list(
6 make_option(c("--mzML_file"), type="character"),
7 make_option(c("--peaks_file"), type="character"),
8 make_option(c("-o", "--out_dir"), type="character"),
9 make_option("--minOffset", default=0.5),
10 make_option("--maxOffset", default=0.5),
11 make_option("--ilim", default=0.05),
12 make_option("--ppm", default=4),
13 make_option("--dimspy", action="store_true"),
14 make_option("--sim", action="store_true"),
15 make_option("--remove_nas", action="store_true"),
16 make_option("--iwNorm", default="none", type="character"),
17 make_option("--file_num_dimspy", default=1),
18 make_option("--exclude_isotopes", action="store_true"),
19 make_option("--isotope_matrix", type="character")
20 )
21
22 # store options
23 opt<- parse_args(OptionParser(option_list=option_list))
24
25 print(sessionInfo())
26 print(opt)
27
28 if (is.null(opt$dimspy)){
29
30 df <- read.table(opt$peaks_file, header = TRUE, sep='\t')
31 filename = NA
32 mzml_file <- opt$mzML_file
33 }else{
34 indf <- read.table(opt$peaks_file,
35 header = TRUE, sep='\t', stringsAsFactors = FALSE)
36
37
38 if (file.exists(opt$mzML_file)){
39 mzml_file <- opt$mzML_file
40 }else{
41
42 filename = colnames(indf)[8:ncol(indf)][opt$file_num_dimspy]
43 print(filename)
44 # check if the data file is mzML or RAW (can only use mzML currently) so
45 # we expect an mzML file of the same name in the same folder
46 indf$i <- indf[,colnames(indf)==filename]
47 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename])
48
49 filename = sub("raw", "mzML", filename, ignore.case = TRUE)
50 print(filename)
51
52 mzml_file <- file.path(opt$mzML_file, filename)
53
54 }
55
56 df <- indf[4:nrow(indf),]
57 if ('blank_flag' %in% colnames(df)){
58 df <- df[df$blank_flag==1,]
59 }
60 colnames(df)[colnames(df)=='m.z'] <- 'mz'
61
62 if ('nan' %in% df$mz){
63 df[df$mz=='nan',]$mz <- NA
64 }
65 df$mz <- as.numeric(df$mz)
66
67
68
69
70 }
71
72 if (!is.null(opt$remove_nas)){
73 df <- df[!is.na(df$mz),]
74 }
75
76 if (is.null(opt$isotope_matrix)){
77 im <- NULL
78 }else{
79 im <- read.table(opt$isotope_matrix,
80 header = TRUE, sep='\t', stringsAsFactors = FALSE)
81 }
82
83 if (is.null(opt$exclude_isotopes)){
84 isotopes <- FALSE
85 }else{
86 isotopes <- TRUE
87 }
88
89
90
91 if (is.null(opt$sim)){
92 sim=FALSE
93 }else{
94 sim=TRUE
95 }
96
97 minOffset = as.numeric(opt$minOffset)
98 maxOffset = as.numeric(opt$maxOffset)
99
100
101
102 if (opt$iwNorm=='none'){
103 iwNorm = FALSE
104 iwNormFun = NULL
105 }else if (opt$iwNorm=='gauss'){
106 iwNorm = TRUE
107 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset)
108 }else if (opt$iwNorm=='rcosine'){
109 iwNorm = TRUE
110 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset)
111 }else if (opt$iwNorm=='QE5'){
112 iwNorm = TRUE
113 iwNormFun = msPurity::iwNormQE.5()
114 }
115
116 print('FIRST ROWS OF PEAK FILE')
117 print(head(df))
118 print(mzml_file)
119 predicted <- msPurity::dimsPredictPuritySingle(df$mz,
120 filepth=mzml_file,
121 minOffset=minOffset,
122 maxOffset=maxOffset,
123 ppm=opt$ppm,
124 mzML=TRUE,
125 sim = sim,
126 ilim = opt$ilim,
127 isotopes = isotopes,
128 im = im,
129 iwNorm = iwNorm,
130 iwNormFun = iwNormFun
131 )
132 predicted <- cbind(df, predicted)
133
134 print(head(predicted))
135 print(file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'))
136 write.table(predicted, file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'), row.names=FALSE, sep='\t')