comparison anticipated_purity_dims.R @ 0:fe7d7cc95ca5 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2e847122cf605951c334858455fc1d3ebdb189e9-dirty
author tomnl
date Tue, 27 Mar 2018 06:03:50 -0400
parents
children 1a88758357ed
comparison
equal deleted inserted replaced
-1:000000000000 0:fe7d7cc95ca5
1 library(msPurity)
2 library(optparse)
3
4 option_list <- list(
5 make_option(c("--mzML_file"), type="character"),
6 make_option(c("--peaks_file"), type="character"),
7 make_option(c("-o", "--out_dir"), type="character"),
8 make_option("--minOffset", default=0.5),
9 make_option("--maxOffset", default=0.5),
10 make_option("--ilim", default=0.05),
11 make_option("--ppm", default=4),
12 make_option("--dimspy", action="store_true"),
13 make_option("--sim", action="store_true"),
14 make_option("--remove_nas", action="store_true"),
15 make_option("--iwNorm", default="none", type="character"),
16 make_option("--dimspy_file_num", default=1),
17 make_option("--exclude_isotopes", action="store_true"),
18 make_option("--isotope_matrix", type="character")
19 )
20
21 # store options
22 opt<- parse_args(OptionParser(option_list=option_list))
23
24 print(sessionInfo())
25 print(opt)
26
27 if (opt$dimspy){
28 indf <- read.table(opt$peaks_file,
29 header = TRUE, sep='\t', stringsAsFactors = FALSE)
30 filename = colnames(indf)[8:ncol(indf)][opt$dimspy_file_num]
31 # check if the data file is mzML or RAW (can only use mzML currently) so
32 # we expect an mzML file of the same name in the same folder
33 indf$i <- indf[,colnames(indf)==filename]
34 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename])
35
36 filename = sub("raw", "mzML", filename, ignore.case = TRUE)
37
38 df <- indf[4:nrow(indf),]
39 if ('blank_flag' %in% colnames(df)){
40 df <- df[df$blank_flag==1,]
41 }
42
43 colnames(df)[colnames(df)=='m.z'] <- 'mz'
44
45 if ('nan' %in% df$mz){
46 df[df$mz=='nan',]$mz <- NA
47 }
48 df$mz <- as.numeric(df$mz)
49
50
51 }else{
52 df <- read.table(opt$peaks_file, header = TRUE, sep='\t')
53 filename = NA
54 }
55
56 if (!is.null(opt$remove_nas)){
57 df <- df[!is.na(df$mz),]
58 }
59
60 if (is.null(opt$isotope_matrix)){
61 im <- NULL
62 }else{
63 im <- read.table(opt$isotope_matrix,
64 header = TRUE, sep='\t', stringsAsFactors = FALSE)
65 }
66
67 if (is.null(opt$exclude_isotopes)){
68 isotopes <- FALSE
69 }else{
70 isotopes <- TRUE
71 }
72
73 print('FIRST ROWS OF PEAK FILE')
74 print(head(df))
75
76 if (dir.exists(opt$mzML_file)){
77 # if directory then we need to add a file name
78 print(filename)
79 if (is.na(filename)){
80 print('ERROR: If a directory is provided then a filename needs to be entered
81 directory or automatically obtained by using a dimspy output')
82 quit()
83 }else{
84 mzml_file <- file.path(opt$mzML_file, filename)
85 }
86 }else{
87 mzml_file <- opt$mzML_file
88 }
89
90 if (is.null(opt$sim)){
91 sim=FALSE
92 }else{
93 sim=TRUE
94 }
95
96 minOffset = as.numeric(opt$minOffset)
97 maxOffset = as.numeric(opt$maxOffset)
98
99
100
101 if (opt$iwNorm=='none'){
102 iwNorm = FALSE
103 iwNormFun = NULL
104 }else if (opt$iwNorm=='gauss'){
105 iwNorm = TRUE
106 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset)
107 }else if (opt$iwNorm=='rcosine'){
108 iwNorm = TRUE
109 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset)
110 }else if (opt$iwNorm=='QE5'){
111 iwNorm = TRUE
112 iwNormFun = msPurity::iwNormQE.5()
113 }
114
115
116 predicted <- msPurity::dimsPredictPuritySingle(df$mz,
117 filepth=mzml_file,
118 minOffset=minOffset,
119 maxOffset=maxOffset,
120 ppm=opt$ppm,
121 mzML=TRUE,
122 sim = sim,
123 ilim = opt$ilim,
124 isotopes = isotopes,
125 im = im,
126 iwNorm = iwNorm,
127 iwNormFun = iwNormFun
128 )
129 predicted <- cbind(df, predicted)
130
131
132 write.table(predicted, file.path(opt$out_dir, 'anticipated_dims_purity.tsv'), row.names=FALSE, sep='\t')