Mercurial > repos > tomnl > create_sqlite_db
comparison anticipated_purity_dims.R @ 0:fe7d7cc95ca5 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2e847122cf605951c334858455fc1d3ebdb189e9-dirty
author | tomnl |
---|---|
date | Tue, 27 Mar 2018 06:03:50 -0400 |
parents | |
children | 1a88758357ed |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fe7d7cc95ca5 |
---|---|
1 library(msPurity) | |
2 library(optparse) | |
3 | |
4 option_list <- list( | |
5 make_option(c("--mzML_file"), type="character"), | |
6 make_option(c("--peaks_file"), type="character"), | |
7 make_option(c("-o", "--out_dir"), type="character"), | |
8 make_option("--minOffset", default=0.5), | |
9 make_option("--maxOffset", default=0.5), | |
10 make_option("--ilim", default=0.05), | |
11 make_option("--ppm", default=4), | |
12 make_option("--dimspy", action="store_true"), | |
13 make_option("--sim", action="store_true"), | |
14 make_option("--remove_nas", action="store_true"), | |
15 make_option("--iwNorm", default="none", type="character"), | |
16 make_option("--dimspy_file_num", default=1), | |
17 make_option("--exclude_isotopes", action="store_true"), | |
18 make_option("--isotope_matrix", type="character") | |
19 ) | |
20 | |
21 # store options | |
22 opt<- parse_args(OptionParser(option_list=option_list)) | |
23 | |
24 print(sessionInfo()) | |
25 print(opt) | |
26 | |
27 if (opt$dimspy){ | |
28 indf <- read.table(opt$peaks_file, | |
29 header = TRUE, sep='\t', stringsAsFactors = FALSE) | |
30 filename = colnames(indf)[8:ncol(indf)][opt$dimspy_file_num] | |
31 # check if the data file is mzML or RAW (can only use mzML currently) so | |
32 # we expect an mzML file of the same name in the same folder | |
33 indf$i <- indf[,colnames(indf)==filename] | |
34 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename]) | |
35 | |
36 filename = sub("raw", "mzML", filename, ignore.case = TRUE) | |
37 | |
38 df <- indf[4:nrow(indf),] | |
39 if ('blank_flag' %in% colnames(df)){ | |
40 df <- df[df$blank_flag==1,] | |
41 } | |
42 | |
43 colnames(df)[colnames(df)=='m.z'] <- 'mz' | |
44 | |
45 if ('nan' %in% df$mz){ | |
46 df[df$mz=='nan',]$mz <- NA | |
47 } | |
48 df$mz <- as.numeric(df$mz) | |
49 | |
50 | |
51 }else{ | |
52 df <- read.table(opt$peaks_file, header = TRUE, sep='\t') | |
53 filename = NA | |
54 } | |
55 | |
56 if (!is.null(opt$remove_nas)){ | |
57 df <- df[!is.na(df$mz),] | |
58 } | |
59 | |
60 if (is.null(opt$isotope_matrix)){ | |
61 im <- NULL | |
62 }else{ | |
63 im <- read.table(opt$isotope_matrix, | |
64 header = TRUE, sep='\t', stringsAsFactors = FALSE) | |
65 } | |
66 | |
67 if (is.null(opt$exclude_isotopes)){ | |
68 isotopes <- FALSE | |
69 }else{ | |
70 isotopes <- TRUE | |
71 } | |
72 | |
73 print('FIRST ROWS OF PEAK FILE') | |
74 print(head(df)) | |
75 | |
76 if (dir.exists(opt$mzML_file)){ | |
77 # if directory then we need to add a file name | |
78 print(filename) | |
79 if (is.na(filename)){ | |
80 print('ERROR: If a directory is provided then a filename needs to be entered | |
81 directory or automatically obtained by using a dimspy output') | |
82 quit() | |
83 }else{ | |
84 mzml_file <- file.path(opt$mzML_file, filename) | |
85 } | |
86 }else{ | |
87 mzml_file <- opt$mzML_file | |
88 } | |
89 | |
90 if (is.null(opt$sim)){ | |
91 sim=FALSE | |
92 }else{ | |
93 sim=TRUE | |
94 } | |
95 | |
96 minOffset = as.numeric(opt$minOffset) | |
97 maxOffset = as.numeric(opt$maxOffset) | |
98 | |
99 | |
100 | |
101 if (opt$iwNorm=='none'){ | |
102 iwNorm = FALSE | |
103 iwNormFun = NULL | |
104 }else if (opt$iwNorm=='gauss'){ | |
105 iwNorm = TRUE | |
106 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) | |
107 }else if (opt$iwNorm=='rcosine'){ | |
108 iwNorm = TRUE | |
109 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) | |
110 }else if (opt$iwNorm=='QE5'){ | |
111 iwNorm = TRUE | |
112 iwNormFun = msPurity::iwNormQE.5() | |
113 } | |
114 | |
115 | |
116 predicted <- msPurity::dimsPredictPuritySingle(df$mz, | |
117 filepth=mzml_file, | |
118 minOffset=minOffset, | |
119 maxOffset=maxOffset, | |
120 ppm=opt$ppm, | |
121 mzML=TRUE, | |
122 sim = sim, | |
123 ilim = opt$ilim, | |
124 isotopes = isotopes, | |
125 im = im, | |
126 iwNorm = iwNorm, | |
127 iwNormFun = iwNormFun | |
128 ) | |
129 predicted <- cbind(df, predicted) | |
130 | |
131 | |
132 write.table(predicted, file.path(opt$out_dir, 'anticipated_dims_purity.tsv'), row.names=FALSE, sep='\t') |