Mercurial > repos > tomnl > create_msp
comparison anticipated_purity_lcms.R @ 0:4b417094bf71 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 0aa10df0ec1ed71601f932cfb11d7d4d4f620d80-dirty
author | tomnl |
---|---|
date | Wed, 02 May 2018 13:09:23 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4b417094bf71 |
---|---|
1 library(msPurity) | |
2 library(optparse) | |
3 print(sessionInfo()) | |
4 | |
5 option_list <- list( | |
6 make_option(c("--xset_path"), type="character"), | |
7 make_option(c("-o", "--out_dir"), type="character"), | |
8 make_option(c("--mzML_path"), type="character"), | |
9 make_option("--minOffset", default=0.5), | |
10 make_option("--maxOffset", default=0.5), | |
11 make_option("--ilim", default=0.05), | |
12 make_option("--iwNorm", default="none", type="character"), | |
13 make_option("--exclude_isotopes", action="store_true"), | |
14 make_option("--isotope_matrix", type="character"), | |
15 make_option("--purityType", default="purityFWHMmedian"), | |
16 make_option("--singleFile", default=0), | |
17 make_option("--cores", default=4), | |
18 make_option("--xgroups", type="character"), | |
19 make_option("--rdata_name", default='xset'), | |
20 make_option("--camera_xcms", default='xset'), | |
21 make_option("--files", type="character"), | |
22 make_option("--galaxy_files", type="character"), | |
23 make_option("--choose_class", type="character"), | |
24 make_option("--ignore_files", type="character"), | |
25 make_option("--rtraw_columns", action="store_true") | |
26 ) | |
27 | |
28 # store options | |
29 opt<- parse_args(OptionParser(option_list=option_list)) | |
30 | |
31 print(sessionInfo()) | |
32 print(opt) | |
33 | |
34 if (!is.null(opt$xgroups)){ | |
35 xgroups = as.numeric(strsplit(opt$xgroups, ',')[[1]]) | |
36 }else{ | |
37 xgroups = NULL | |
38 } | |
39 | |
40 | |
41 | |
42 print(xgroups) | |
43 | |
44 if (!is.null(opt$remove_nas)){ | |
45 df <- df[!is.na(df$mz),] | |
46 } | |
47 | |
48 if (is.null(opt$isotope_matrix)){ | |
49 im <- NULL | |
50 }else{ | |
51 im <- read.table(opt$isotope_matrix, | |
52 header = TRUE, sep='\t', stringsAsFactors = FALSE) | |
53 } | |
54 | |
55 if (is.null(opt$exclude_isotopes)){ | |
56 isotopes <- FALSE | |
57 }else{ | |
58 isotopes <- TRUE | |
59 } | |
60 | |
61 if (is.null(opt$rtraw_columns)){ | |
62 rtraw_columns <- FALSE | |
63 }else{ | |
64 rtraw_columns <- TRUE | |
65 } | |
66 | |
67 loadRData <- function(rdata_path, xset_name){ | |
68 #loads an RData file, and returns the named xset object if it is there | |
69 load(rdata_path) | |
70 print(ls()) | |
71 return(get(ls()[ls() == xset_name])) | |
72 } | |
73 | |
74 target_obj <- loadRData(opt$xset_path, opt$rdata_name) | |
75 | |
76 if (opt$camera_xcms=='camera'){ | |
77 xset <- target_obj@xcmsSet | |
78 }else{ | |
79 xset <- target_obj | |
80 } | |
81 | |
82 print(xset) | |
83 | |
84 minOffset = as.numeric(opt$minOffset) | |
85 maxOffset = as.numeric(opt$maxOffset) | |
86 | |
87 | |
88 if (opt$iwNorm=='none'){ | |
89 iwNorm = FALSE | |
90 iwNormFun = NULL | |
91 }else if (opt$iwNorm=='gauss'){ | |
92 iwNorm = TRUE | |
93 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) | |
94 }else if (opt$iwNorm=='rcosine'){ | |
95 iwNorm = TRUE | |
96 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) | |
97 }else if (opt$iwNorm=='QE5'){ | |
98 iwNorm = TRUE | |
99 iwNormFun = msPurity::iwNormQE.5() | |
100 } | |
101 | |
102 print(xset@filepaths) | |
103 | |
104 | |
105 | |
106 if (!is.null(opt$files)){ | |
107 updated_filepaths <- trimws(strsplit(opt$files, ',')[[1]]) | |
108 updated_filepaths <- updated_filepaths[updated_filepaths != ""] | |
109 print(updated_filepaths) | |
110 updated_filenames = basename(updated_filepaths) | |
111 original_filenames = basename(xset@filepaths) | |
112 update_idx = match(updated_filenames, original_filenames) | |
113 | |
114 if (!is.null(opt$galaxy_files)){ | |
115 galaxy_files <- trimws(strsplit(opt$galaxy_files, ',')[[1]]) | |
116 galaxy_files <- galaxy_files[galaxy_files != ""] | |
117 xset@filepaths <- galaxy_files[update_idx] | |
118 }else{ | |
119 xset@filepaths <- updated_filepaths[update_idx] | |
120 } | |
121 } | |
122 | |
123 if (!is.null(opt$choose_class)){ | |
124 classes <- trimws(strsplit(opt$choose_class, ',')[[1]]) | |
125 | |
126 | |
127 ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes) | |
128 | |
129 print('choose class') | |
130 print(ignore_files_class) | |
131 }else{ | |
132 ignore_files_class <- NA | |
133 } | |
134 | |
135 if (!is.null(opt$ignore_files)){ | |
136 ignore_files_string <- trimws(strsplit(opt$ignore_files, ',')[[1]]) | |
137 filenames <- rownames(xset@phenoData) | |
138 ignore_files <- which(filenames %in% ignore_files_string) | |
139 | |
140 ignore_files <- unique(c(ignore_files, ignore_files_class)) | |
141 ignore_files <- ignore_files[ignore_files != ""] | |
142 }else{ | |
143 if (anyNA(ignore_files_class)){ | |
144 ignore_files <- NULL | |
145 }else{ | |
146 ignore_files <- ignore_files_class | |
147 } | |
148 | |
149 } | |
150 | |
151 print('ignore_files') | |
152 print(ignore_files) | |
153 | |
154 | |
155 ppLCMS <- msPurity::purityX(xset=xset, | |
156 offsets=c(minOffset, maxOffset), | |
157 cores=opt$cores, | |
158 xgroups=xgroups, | |
159 purityType=opt$purityType, | |
160 ilim = opt$ilim, | |
161 isotopes = isotopes, | |
162 im = im, | |
163 iwNorm = iwNorm, | |
164 iwNormFun = iwNormFun, | |
165 singleFile = opt$singleFile, | |
166 fileignore = ignore_files, | |
167 rtrawColumns=rtraw_columns) | |
168 | |
169 | |
170 dfp <- ppLCMS@predictions | |
171 | |
172 # to make compatable with deconrank | |
173 colnames(dfp)[colnames(dfp)=='grpid'] = 'peakID' | |
174 colnames(dfp)[colnames(dfp)=='median'] = 'medianPurity' | |
175 colnames(dfp)[colnames(dfp)=='mean'] = 'meanPurity' | |
176 colnames(dfp)[colnames(dfp)=='sd'] = 'sdPurity' | |
177 colnames(dfp)[colnames(dfp)=='stde'] = 'sdePurity' | |
178 colnames(dfp)[colnames(dfp)=='RSD'] = 'cvPurity' | |
179 colnames(dfp)[colnames(dfp)=='pknm'] = 'pknmPurity' | |
180 if(sum(is.na(dfp$medianPurity))>0){ | |
181 dfp[is.na(dfp$medianPurity),]$medianPurity = 0 | |
182 } | |
183 | |
184 print('saving tsv') | |
185 print(head(dfp)) | |
186 write.table(dfp, file.path(opt$out_dir, 'anticipated_purity_lcms.tsv'), row.names=FALSE, sep='\t') | |
187 print('saving RData') | |
188 save.image(file.path(opt$out_dir, 'anticipated_purity_lcms.RData')) |