comparison msi_preprocessing.xml @ 5:755d77066d4b draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 37da74ed68228b16efbdbde776e7c38cc06eb5d5
author galaxyp
date Tue, 19 Jun 2018 18:05:34 -0400
parents ada9dee67b5d
children 82a0eba2e3af
comparison
equal deleted inserted replaced
4:ada9dee67b5d 5:755d77066d4b
1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.1"> 1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.2">
2 <description> 2 <description>
3 mass spectrometry imaging preprocessing 3 mass spectrometry imaging preprocessing
4 </description> 4 </description>
5 <requirements> 5 <requirements>
6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement>
7 <requirement type="package" version="2.2.1">r-gridextra</requirement> 7 <requirement type="package" version="2.2.1">r-gridextra</requirement>
8 <requirement type="package" version="0.20-35">r-lattice</requirement> 8 <requirement type="package" version="0.20-35">r-lattice</requirement>
9 <requirement type="package" version="3.34.9">bioconductor-limma</requirement> 9 <!--requirement type="package" version="3.34.9">bioconductor-limma</requirement-->
10 </requirements> 10 </requirements>
11 <command detect_errors="exit_code"> 11 <command detect_errors="exit_code">
12 <![CDATA[ 12 <![CDATA[
13 13
14 #if $infile.ext == 'imzml' 14 #if $infile.ext == 'imzml'
32 ################################# load libraries and read file ################# 32 ################################# load libraries and read file #################
33 33
34 library(Cardinal) 34 library(Cardinal)
35 library(gridExtra) 35 library(gridExtra)
36 library(lattice) 36 library(lattice)
37 library(limma) 37 ###library(limma)
38 38
39 #if $infile.ext == 'imzml' 39 #if $infile.ext == 'imzml'
40 msidata = readImzML('infile') 40 msidata <- readImzML('infile', mass.accuracy=$accuracy, units.accuracy = "$units")
41 #elif $infile.ext == 'analyze75' 41 #elif $infile.ext == 'analyze75'
42 msidata = readAnalyze('infile') 42 msidata = readAnalyze('infile')
43 #else 43 #else
44 load('infile.RData') 44 load('infile.RData')
45 #end if 45 #end if
50 #loads an RData file, and returns it 50 #loads an RData file, and returns it
51 load(fileName) 51 load(fileName)
52 get(ls()[ls() != "fileName"]) 52 get(ls()[ls() != "fileName"])
53 } 53 }
54 54
55 ######################### preparations for optional QC report ################# 55 ######################### preparations for QC report #################
56
57 #if $outputs.outputs_select == "quality_control":
58
59 ### values for QC table:
60 56
61 maxfeatures = length(features(msidata)) 57 maxfeatures = length(features(msidata))
62 medianpeaks = median(colSums(spectra(msidata)[]>0)) 58 medianpeaks = median(colSums(spectra(msidata)[]>0))
63 medint = round(median(spectra(msidata)[]), digits=2) 59 medint = round(median(spectra(msidata)[]), digits=2)
64 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 60 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
65 QC_numbers= data.frame(rawdata = c(maxfeatures, medianpeaks, medint, TICs)) 61 QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs))
66 vectorofactions = "rawdata" 62 vectorofactions = "inputdata"
67
68 ### Read tabular file with calibrant m/z:
69
70 calibrant_list = read.delim("$outputs.calibrant_file", header = FALSE, stringsAsFactors = FALSE)
71
72 ### calculate how many input calibrant m/z are valid:
73
74 inputcalibrants = calibrant_list[calibrant_list[,$outputs.calibrants_column]>min(mz(msidata)) & calibrant_list[,$outputs.calibrants_column]<max(mz(msidata)),$outputs.calibrants_column]
75 number_calibrants_in = length(calibrant_list[,$outputs.calibrants_column])
76 number_calibrants_valid = length(inputcalibrants)
77
78 ### Quality control report
79
80 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
81 plot(0,type='n',axes=FALSE,ann=FALSE)
82 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))
83 title(main=paste0("\n\n\n\n Number valid m/z in ", "$outputs.calibrant_file.display_name",": ", number_calibrants_valid, "/", number_calibrants_in))
84
85 for (calibrant in inputcalibrants)
86 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
87 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
88 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="raw")
89 assign(paste("rawdata",calibrant, sep="_"), currentimage)}
90
91 current_plot_raw = vector(length(inputcalibrants), mode='list')
92
93 #end if
94 63
95 ############################### Preprocessing steps ########################### 64 ############################### Preprocessing steps ###########################
96 ############################################################################### 65 ###############################################################################
97 66
98 #for $method in $methods: 67 #for $method in $methods:
103 print('Normalization') 72 print('Normalization')
104 ##normalization 73 ##normalization
105 74
106 msidata = normalize(msidata, method="tic") 75 msidata = normalize(msidata, method="tic")
107 76
108 ############################### optional QC ########################### 77 ############################### QC ###########################
109 78
110 #if $outputs.outputs_select == "quality_control":
111
112 ### values for QC table:
113 maxfeatures = length(features(msidata)) 79 maxfeatures = length(features(msidata))
114 medianpeaks = median(colSums(spectra(msidata)[]>0)) 80 medianpeaks = median(colSums(spectra(msidata)[]>0))
115 medint = round(median(spectra(msidata)[]), digits=2) 81 medint = round(median(spectra(msidata)[]), digits=2)
116 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 82 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
117 normalized = c(maxfeatures, medianpeaks, medint, TICs) 83 normalized = c(maxfeatures, medianpeaks, medint, TICs)
118 QC_numbers= cbind(QC_numbers, normalized) 84 QC_numbers= cbind(QC_numbers, normalized)
119
120 ### preparation for QC plots
121 vectorofactions = append(vectorofactions, "normalized") 85 vectorofactions = append(vectorofactions, "normalized")
122 for (calibrant in inputcalibrants)
123 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
124 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
125 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="normalized")
126 assign(paste("normalized",calibrant, sep="_"), currentimage)}
127
128 #end if
129 86
130 ############################### Baseline reduction ########################### 87 ############################### Baseline reduction ###########################
131 88
132 #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction': 89 #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction':
133 print('Baseline_reduction') 90 print('Baseline_reduction')
134 ##baseline reduction 91 ##baseline reduction
135 92
136 msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline) 93 msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline)
137 94
138 ############################### optional QC ########################### 95 ############################### QC ###########################
139 96
140 #if $outputs.outputs_select == "quality_control":
141
142 ### values for QC table:
143 maxfeatures = length(features(msidata)) 97 maxfeatures = length(features(msidata))
144 medianpeaks = median(colSums(spectra(msidata)[]>0)) 98 medianpeaks = median(colSums(spectra(msidata)[]>0))
145 medint = round(median(spectra(msidata)[]), digits=2) 99 medint = round(median(spectra(msidata)[]), digits=2)
146 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 100 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
147 baseline= c(maxfeatures, medianpeaks, medint, TICs) 101 baseline= c(maxfeatures, medianpeaks, medint, TICs)
148 QC_numbers= cbind(QC_numbers, baseline) 102 QC_numbers= cbind(QC_numbers, baseline)
149 103 vectorofactions = append(vectorofactions, "baseline red.")
150 ### preparation for QC plots
151 vectorofactions = append(vectorofactions, "baseline_rem")
152
153 for (calibrant in inputcalibrants)
154 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
155 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
156 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="baseline removed")
157 assign(paste("baseline_rem",calibrant, sep="_"), currentimage)}
158
159 #end if
160 104
161 ############################### Smoothing ########################### 105 ############################### Smoothing ###########################
162 106
163 #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing': 107 #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing':
164 print('Smoothing') 108 print('Smoothing')
178 122
179 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) 123 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
180 124
181 #end if 125 #end if
182 126
183 ############################### optional QC ########################### 127 ############################### QC ###########################
184 128
185 #if $outputs.outputs_select == "quality_control":
186
187 ### values for QC table:
188 maxfeatures = length(features(msidata)) 129 maxfeatures = length(features(msidata))
189 medianpeaks = median(colSums(spectra(msidata)[]>0)) 130 medianpeaks = median(colSums(spectra(msidata)[]>0))
190 medint = round(median(spectra(msidata)[]), digits=2) 131 medint = round(median(spectra(msidata)[]), digits=2)
191 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 132 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
192 smoothed= c(maxfeatures, medianpeaks, medint, TICs) 133 smoothed= c(maxfeatures, medianpeaks, medint, TICs)
193 QC_numbers= cbind(QC_numbers, smoothed) 134 QC_numbers= cbind(QC_numbers, smoothed)
194
195 ### preparation for QC plots
196 vectorofactions = append(vectorofactions, "smoothed") 135 vectorofactions = append(vectorofactions, "smoothed")
197
198 for (calibrant in inputcalibrants)
199 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
200 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
201 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="smoothed")
202 assign(paste("smoothed",calibrant, sep="_"), currentimage)}
203
204 #end if
205 136
206 ############################### Peak picking ########################### 137 ############################### Peak picking ###########################
207 138
208 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': 139 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking':
209 print('Peak_picking') 140 print('Peak_picking')
224 155
225 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method) 156 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method)
226 157
227 #end if 158 #end if
228 159
229 ############################### optional QC ########################### 160 ############################### QC ###########################
230 161
231 #if $outputs.outputs_select == "quality_control":
232
233 ### values for QC table:
234 maxfeatures = length(features(msidata)) 162 maxfeatures = length(features(msidata))
235 medianpeaks = median(colSums(spectra(msidata)[]>0)) 163 medianpeaks = median(colSums(spectra(msidata)[]>0))
236 medint = round(median(spectra(msidata)[]), digits=2) 164 medint = round(median(spectra(msidata)[]), digits=2)
237 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 165 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
238 picked= c(maxfeatures, medianpeaks, medint, TICs) 166 picked= c(maxfeatures, medianpeaks, medint, TICs)
239 QC_numbers= cbind(QC_numbers, picked) 167 QC_numbers= cbind(QC_numbers, picked)
240
241 ### preparation for QC plots
242 vectorofactions = append(vectorofactions, "picked") 168 vectorofactions = append(vectorofactions, "picked")
243
244 for (calibrant in inputcalibrants)
245 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
246 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
247 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="picked")
248 assign(paste("picked",calibrant, sep="_"), currentimage)}
249
250 #end if
251 169
252 ############################### Peak alignment ########################### 170 ############################### Peak alignment ###########################
253 171
254 #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment': 172 #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment':
255 print('Peak_alignment') 173 print('Peak_alignment')
283 201
284 msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference) 202 msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference)
285 203
286 #end if 204 #end if
287 205
288 ############################### optional QC ########################### 206 ############################### QC ###########################
289 #if $outputs.outputs_select == "quality_control": 207
290
291 ### values for QC table:
292 maxfeatures = length(features(msidata)) 208 maxfeatures = length(features(msidata))
293 medianpeaks = median(colSums(spectra(msidata)[]>0)) 209 medianpeaks = median(colSums(spectra(msidata)[]>0))
294 medint = round(median(spectra(msidata)[]), digits=2) 210 medint = round(median(spectra(msidata)[]), digits=2)
295 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 211 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
296 aligned= c(maxfeatures, medianpeaks, medint, TICs) 212 aligned= c(maxfeatures, medianpeaks, medint, TICs)
297 QC_numbers= cbind(QC_numbers, aligned) 213 QC_numbers= cbind(QC_numbers, aligned)
298
299 ### preparation for QC plots
300 vectorofactions = append(vectorofactions, "aligned") 214 vectorofactions = append(vectorofactions, "aligned")
301
302 for (calibrant in inputcalibrants)
303 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
304 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
305 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="aligned")
306 assign(paste("aligned",calibrant, sep="_"), currentimage)}
307
308 #end if
309 215
310 ############################### Peak filtering ########################### 216 ############################### Peak filtering ###########################
311 217
312 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering': 218 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering':
313 print('Peak_filtering') 219 print('Peak_filtering')
314 220
315 msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering) 221 msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering)
316 222
317 ############################### optional QC ########################### 223 ############################### QC ###########################
318 224
319 #if $outputs.outputs_select == "quality_control":
320
321 ### values for QC table:
322 maxfeatures = length(features(msidata)) 225 maxfeatures = length(features(msidata))
323 medianpeaks = median(colSums(spectra(msidata)[]>0)) 226 medianpeaks = median(colSums(spectra(msidata)[]>0))
324 medint = round(median(spectra(msidata)[]), digits=2) 227 medint = round(median(spectra(msidata)[]), digits=2)
325 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 228 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
326 filtered= c(maxfeatures, medianpeaks, medint, TICs) 229 filtered= c(maxfeatures, medianpeaks, medint, TICs)
327 QC_numbers= cbind(QC_numbers, filtered) 230 QC_numbers= cbind(QC_numbers, filtered)
328
329 ### preparation for QC plots
330 vectorofactions = append(vectorofactions, "filtered") 231 vectorofactions = append(vectorofactions, "filtered")
331
332 for (calibrant in inputcalibrants)
333 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
334 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
335 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="filtered")
336 assign(paste("filtered",calibrant, sep="_"), currentimage)}
337
338 #end if
339 232
340 ############################### Data reduction ########################### 233 ############################### Data reduction ###########################
341 234
342 #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction': 235 #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction':
343 print('Data_reduction') 236 print('Data_reduction')
366 peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata') 259 peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata')
367 260
368 #end if 261 #end if
369 262
370 msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type") 263 msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type")
371
372 #end if 264 #end if
373 265 ############################### QC ###########################
374 ############################### optional QC ########################### 266
375
376 #if $outputs.outputs_select == "quality_control":
377
378 ### values for QC table:
379 maxfeatures = length(features(msidata)) 267 maxfeatures = length(features(msidata))
380 medianpeaks = median(colSums(spectra(msidata)[]>0)) 268 medianpeaks = median(colSums(spectra(msidata)[]>0))
381 medint = round(median(spectra(msidata)[]), digits=2) 269 medint = round(median(spectra(msidata)[]), digits=2)
382 TICs = round(mean(colSums(spectra(msidata)[])), digits=1) 270 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
383 reduced= c(maxfeatures, medianpeaks, medint, TICs) 271 reduced= c(maxfeatures, medianpeaks, medint, TICs)
384 QC_numbers= cbind(QC_numbers, reduced) 272 QC_numbers= cbind(QC_numbers, reduced)
385
386 ### preparation for QC plots
387 vectorofactions = append(vectorofactions, "reduced") 273 vectorofactions = append(vectorofactions, "reduced")
388 274
389 for (calibrant in inputcalibrants)
390 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
391 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
392 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="reduced")
393 assign(paste("reduced",calibrant, sep="_"), currentimage)}
394
395 #end if
396
397 ############################### Transformation ########################### 275 ############################### Transformation ###########################
398 276
399 ####elif str( $method.methods_conditional.preprocessing_method) == 'Transformation': 277 #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation':
400 ###print('Transformation') 278 print('Transformation')
401 279
402 ####if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': 280 #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2':
403 ####print('log2 transformation') 281 print('log2 transformation')
404 282
405 ###spectra(msidata)[spectra(msidata) ==0] = NA 283 spectra(msidata)[][spectra(msidata)[] ==0] = NA
406 ###print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata))))) 284 print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[]))))
407 ###spectra(msidata) = log2(spectra(msidata)) 285 spectra(msidata)[] = log2(spectra(msidata)[])
408 286
409 ####elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt': 287 #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt':
410 ###print('squareroot transformation') 288 print('squareroot transformation')
411 289
412 ###spectra(msidata) = sqrt(spectra(msidata)) 290 spectra(msidata)[] = sqrt(spectra(msidata)[])
413 291
414 ###end if 292 #end if
415 293
416 ############################### optional QC ########################### 294 ############################### QC ###########################
417 295
418 #if $outputs.outputs_select == "quality_control":
419
420 ### values for QC table:
421 maxfeatures = length(features(msidata)) 296 maxfeatures = length(features(msidata))
422 medianpeaks = median(colSums(spectra(msidata)[]>0), na.rm=TRUE) 297 medianpeaks = median(colSums(spectra(msidata)[]>0), na.rm=TRUE)
423 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) 298 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2)
424 TICs = round(mean(colSums(spectra(msidata)[]), na.rm=TRUE), digits=1) 299 TICs = round(mean(colSums(spectra(msidata)[]), na.rm=TRUE), digits=1)
425 transformed= c(maxfeatures, medianpeaks, medint, TICs) 300 transformed= c(maxfeatures, medianpeaks, medint, TICs)
426 QC_numbers= cbind(QC_numbers, transformed) 301 QC_numbers= cbind(QC_numbers, transformed)
427
428 ### preparation for QC plots
429 vectorofactions = append(vectorofactions, "transformed") 302 vectorofactions = append(vectorofactions, "transformed")
430 303
431 for (calibrant in inputcalibrants)
432 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
433 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
434 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="transformed")
435 assign(paste("transformed",calibrant, sep="_"), currentimage)}
436
437 #end if 304 #end if
438
439 ############################### optional QC ###########################
440
441 #if $outputs.outputs_select == "quality_control":
442
443 ### values for QC table:
444 maxfeatures = length(features(msidata))
445 medianpeaks = median(colSums(spectra(msidata)[]>0))
446 medint = round(median(spectra(msidata)[]), digits=2)
447 TICs = round(mean(colSums(spectra(msidata)[])), digits=1)
448 sample_norm= c(maxfeatures, medianpeaks, medint, TICs)
449 QC_numbers= cbind(QC_numbers, sample_norm)
450
451 ### preparation for QC plots
452 vectorofactions = append(vectorofactions, "sample_norm")
453
454 for (calibrant in inputcalibrants)
455 {currentimage = image(msidata , mz=calibrant, strip = strip.custom(bg="lightgrey",
456 par.strip.text=list(col="black", cex=.9)),lattice=TRUE,
457 scales = list(draw = FALSE), plusminus = $outputs.plusminus_dalton, main="sample normalized")
458 assign(paste("sample_norm",calibrant, sep="_"), currentimage)}
459
460 #end if
461
462 #end if
463 #end for 305 #end for
464 306
465 ###################### Outputs: RData, tabular and QC report ################### 307 ############# Outputs: summar matrix, RData, tabular and QC report #############
466 ############################################################################### 308 ################################################################################
309 ## optional summarized matrix
310 print('Summarized matrix')
311
312 #if "mean" in str($summary_type).split(","):
313 print("mean matrix")
314 if (!is.null(levels(msidata\$combined_sample))){
315
316 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
317 count = 1
318 for (subsample in levels(msidata\$combined_sample)){
319 subsample_pixels = msidata[,msidata\$combined_sample == subsample]
320 subsample_calc = apply(spectra(subsample_pixels)[],1,mean, na.rm=TRUE)
321 sample_matrix = cbind(sample_matrix, subsample_calc)
322 count = count+1
323 }
324 rownames(sample_matrix) = mz(msidata)
325 colnames(sample_matrix) = levels(msidata\$combined_sample)
326 write.table(sample_matrix, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
327 }else{
328 full_sample_calc = as.data.frame(apply(spectra(msidata)[],1,mean, na.rm=TRUE))
329 rownames(full_sample_calc) = mz(msidata)
330 colnames(full_sample_calc) = "$infile.display_name"
331 write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
332 }
333
334 #end if
335
336 #if "median" in str($summary_type).split(","):
337 print("median matrix")
338 if (!is.null(levels(msidata\$combined_sample))){
339 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
340 count = 1
341 for (subsample in levels(msidata\$combined_sample)){
342 subsample_pixels = msidata[,msidata\$combined_sample == subsample]
343 subsample_calc = apply(spectra(subsample_pixels)[],1,median, na.rm=TRUE)
344 sample_matrix = cbind(sample_matrix, subsample_calc)
345 count = count+1
346 }
347
348 rownames(sample_matrix) = mz(msidata)
349 colnames(sample_matrix) = levels(msidata\$combined_sample)
350 write.table(sample_matrix, file="$summarized_output_median", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
351 }else{
352 full_sample_calc = apply(spectra(msidata)[],1,median, na.rm=TRUE)
353 rownames(full_sample_calc) = mz(msidata)
354 colnames(full_sample_calc) = "$infile.display_name"
355 write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
356 }
357 #end if
358
359 #if "sd" in str($summary_type).split(","):
360 print("sd matrix")
361 if (!is.null(levels(msidata\$combined_sample))){
362 sample_matrix = matrix(,ncol=0, nrow=nrow(msidata))
363 count = 1
364 for (subsample in levels(msidata\$combined_sample)){
365 subsample_pixels = msidata[,msidata\$combined_sample == subsample]
366 subsample_calc = apply(spectra(subsample_pixels)[],1,sd, na.rm=TRUE)
367 sample_matrix = cbind(sample_matrix, subsample_calc)
368 count = count+1
369 }
370
371 rownames(sample_matrix) = mz(msidata)
372 colnames(sample_matrix) = levels(msidata\$combined_sample)
373 write.table(sample_matrix, file="$summarized_output_sd", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
374 }else{
375 full_sample_calc = apply(spectra(msidata)[],1,sd, na.rm=TRUE)
376 rownames(full_sample_calc) = mz(msidata)
377 colnames(full_sample_calc) = "$infile.display_name"
378 write.table(full_sample_calc, file="$summarized_output_mean", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
379 }
380 #end if
467 381
468 ## save as (.RData) 382 ## save as (.RData)
469 save(msidata, file="$msidata_preprocessed") 383 save(msidata, file="$msidata_preprocessed")
470 384
471 print(paste0("Number of NAs in intensity matrix: ", sum(is.na(spectra(msidata))))) 385 print(paste0("Number of NAs in intensity matrix: ", sum(is.na(spectra(msidata)[]))))
472 386
473 ## save output matrix 387 ## save output matrix
474 #if $output_matrix: 388 #if $output_matrix:
475
476 389
477 if (length(features(msidata))> 0) 390 if (length(features(msidata))> 0)
478 { 391 {
479 ## save as intensity matrix 392 ## save as intensity matrix
480 spectramatrix = spectra(msidata) 393 spectramatrix = spectra(msidata)[]
481 rownames(spectramatrix) = mz(msidata) 394 rownames(spectramatrix) = mz(msidata)
482 newmatrix = rbind(pixels(msidata), spectramatrix) 395 newmatrix = rbind(pixels(msidata), spectramatrix)
483 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") 396 write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t")
484 397
485 }else{ 398 }else{
488 } 401 }
489 402
490 #end if 403 #end if
491 404
492 ## save QC report 405 ## save QC report
493 #if $outputs.outputs_select == "quality_control": 406
494 407 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
408 plot(0,type='n',axes=FALSE,ann=FALSE)
409 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name"))
495 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC") 410 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC")
496 grid.table(t(QC_numbers)) 411 grid.table(t(QC_numbers))
497
498 for (calibrant in inputcalibrants)
499 {imagelist = list()
500 for (numberprepro in 1:length(vectorofactions)){
501 imagelist[[numberprepro]] = get(paste(vectorofactions[numberprepro],calibrant, sep="_"))}
502 do.call(grid.arrange,imagelist)}
503
504 dev.off() 412 dev.off()
505
506 #end if
507
508 413
509 ]]></configfile> 414 ]]></configfile>
510 </configfiles> 415 </configfiles>
511 <inputs> 416 <inputs>
512 <param name="infile" type="data" format="imzml,rdata,danalyze75" 417 <param name="infile" type="data" format="imzml,rdata,danalyze75"
513 label="MSI rawdata as imzml, analyze7.5 or Cardinal MSImageSet saved as RData" 418 label="MSI data as imzml, analyze7.5 or Cardinal MSImageSet saved as RData"
514 help="load imzml and ibd file by uploading composite datatype imzml"/> 419 help="load imzml and ibd file by uploading composite datatype imzml"/>
420 <param name="accuracy" type="float" value="50" label="Only for processed imzML files: enter mass accuracy to which the m/z values will be binned" help="This should be set to the native accuracy of the mass spectrometer, if known"/>
421 <param name="units" display="radio" type="select" label="Only for processed imzML files: unit of the mass accuracy" help="either m/z or ppm">
422 <option value="mz" >mz</option>
423 <option value="ppm" selected="True" >ppm</option>
424 </param>
515 <repeat name="methods" title="Preprocessing" min="1" max="50"> 425 <repeat name="methods" title="Preprocessing" min="1" max="50">
516 <conditional name="methods_conditional"> 426 <conditional name="methods_conditional">
517 <param name="preprocessing_method" type="select" label="Select the preprocessing methods you want to apply"> 427 <param name="preprocessing_method" type="select" label="Select the preprocessing methods you want to apply">
518 <option value="Normalization" selected="True">Normalization to TIC</option> 428 <option value="Normalization" selected="True">Normalization to TIC</option>
519 <option value="Baseline_reduction">Baseline Reduction</option> 429 <option value="Baseline_reduction">Baseline Reduction</option>
520 <option value="Smoothing">Peak smoothing</option> 430 <option value="Smoothing">Peak smoothing</option>
521 <option value="Peak_picking">Peak picking</option> 431 <option value="Peak_picking">Peak picking</option>
522 <option value="Peak_alignment">Peak alignment</option> 432 <option value="Peak_alignment">Peak alignment</option>
523 <option value="Peak_filtering">Peak filtering</option> 433 <option value="Peak_filtering">Peak filtering</option>
524 <option value="Data_reduction">Data reduction</option> 434 <option value="Data_reduction">Data reduction</option>
525 <!--option value="Transformation">Transformation</option--> 435 <option value="Transformation">Transformation</option>
526
527 </param> 436 </param>
528 <when value="Normalization"/> 437 <when value="Normalization"/>
529 <when value="Baseline_reduction"> 438 <when value="Baseline_reduction">
530 <param name="blocks_baseline" type="integer" value="50" 439 <param name="blocks_baseline" type="integer" value="50"
531 label="Blocks"/> 440 label="Blocks"/>
667 </when> 576 </when>
668 </conditional> 577 </conditional>
669 </when> 578 </when>
670 </conditional> 579 </conditional>
671 </when> 580 </when>
672 <!--when value="Transformation"> 581 <when value="Transformation">
673 <conditional name="transf_conditional"> 582 <conditional name="transf_conditional">
674 <param name="trans_type" type="select" label="Choose which intensity transformation you want to apply" help="logarithm base 2 (log2) or squareroot (sqrt)"> 583 <param name="trans_type" type="select" label="Choose which intensity transformation you want to apply" help="logarithm base 2 (log2) or squareroot (sqrt)">
675 <option value="log2" selected="True">log2</option> 584 <option value="log2" selected="True">log2</option>
676 <option value="sqrt">sqrt</option> 585 <option value="sqrt">sqrt</option>
677 </param> 586 </param>
678 <when value="log2"/> 587 <when value="log2"/>
679 <when value="sqrt"/> 588 <when value="sqrt"/>
680 </conditional> 589 </conditional>
681 </when--> 590 </when>
682 </conditional> 591 </conditional>
683 </repeat> 592 </repeat>
684 <conditional name="outputs"> 593 <param name="summary_type" type="select" display="checkboxes" multiple="true" label="Summarize all pixels of a sample and calculate the mean, median or standard deviation">
685 <param name="outputs_select" type="select" label="Quality control output"> 594 <option value="mean">mean</option>
686 <option value="quality_control" selected="True">yes</option> 595 <option value="median">median</option>
687 <option value="no_quality_control">no</option> 596 <option value="sd">standard deviation</option>
688 </param> 597 </param>
689 <when value="quality_control"> 598 <param name="output_matrix" type="boolean" label="Intensity matrix output"/>
690 <param name="calibrant_file" type="data" format="tabular"
691 label="Provide a list of m/z, which will be plotted in the quality control report"
692 help="Use internal calibrant m/z"/>
693 <param name="calibrants_column" data_ref="calibrant_file" label="Column with m/z" type="data_column"/>
694 <param name="plusminus_dalton" value="0.25" type="text" label="M/z range" help="Plusminus m/z window in Dalton"/>
695 </when>
696 <when value="no_quality_control"/>
697 </conditional>
698 <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/>
699 </inputs> 599 </inputs>
700 <outputs> 600 <outputs>
701 <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/> 601 <data format="rdata" name="msidata_preprocessed" label="$infile.display_name preprocessed"/>
702 <data format="pdf" name="QC_plots" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"> 602 <data format="pdf" name="QC_plots" from_work_dir="Preprocessing.pdf" label = "$infile.display_name preprocessed_QC"/>
703 <filter>outputs["outputs_select"] == "quality_control"</filter> 603 <data format="tabular" name="summarized_output_mean" label="$infile.display_name mean_matrix">
604 <filter>summary_type and "mean" in summary_type</filter>
605 </data>
606 <data format="tabular" name="summarized_output_median" label="$infile.display_name median_matrix">
607 <filter>summary_type and "median" in summary_type</filter>
608 </data>
609 <data format="tabular" name="summarized_output_sd" label="$infile.display_name sd_matrix">
610 <filter>summary_type and "sd" in summary_type</filter>
704 </data> 611 </data>
705 <data format="tabular" name="matrixasoutput" label="$infile.display_name preprocessed_matrix"> 612 <data format="tabular" name="matrixasoutput" label="$infile.display_name preprocessed_matrix">
706 <filter>output_matrix</filter> 613 <filter>output_matrix</filter>
707 </data> 614 </data>
708 </outputs> 615 </outputs>
709 <tests> 616 <tests>
710 <test expect_num_outputs="2"> 617 <test expect_num_outputs="3">
711 <param name="infile" value="" ftype="imzml"> 618 <param name="infile" value="" ftype="imzml">
712 <composite_data value="Example_Continuous.imzML"/> 619 <composite_data value="Example_Continuous.imzML"/>
713 <composite_data value="Example_Continuous.ibd"/> 620 <composite_data value="Example_Continuous.ibd"/>
714 </param> 621 </param>
715 <repeat name="methods"> 622 <repeat name="methods">
747 <conditional name="methods_conditional"> 654 <conditional name="methods_conditional">
748 <param name="preprocessing_method" value="Peak_filtering"/> 655 <param name="preprocessing_method" value="Peak_filtering"/>
749 <param name="frequ_filtering" value="2"/> 656 <param name="frequ_filtering" value="2"/>
750 </conditional> 657 </conditional>
751 </repeat> 658 </repeat>
752 <!--repeat name="methods"> 659 <repeat name="methods">
753 <conditional name="methods_conditional"> 660 <conditional name="methods_conditional">
754 <param name="preprocessing_method" value="Transformation"/> 661 <param name="preprocessing_method" value="Transformation"/>
755 <conditional name="transf_conditional"> 662 <conditional name="transf_conditional">
756 <param name="trans_type" value="sqrt"/> 663 <param name="trans_type" value="sqrt"/>
757 </conditional> 664 </conditional>
758 </conditional> 665 </conditional>
759 </repeat--> 666 </repeat>
760 <param name="outputs_select" value="no_quality_control"/>
761 <param name="output_matrix" value="True"/> 667 <param name="output_matrix" value="True"/>
762 <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/> 668 <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/>
763 <output name="matrixasoutput" file="preprocessing_results1.txt"/> 669 <output name="matrixasoutput" file="preprocessing_results1.txt"/>
670 <output name="QC_plots" file="preprocessing_results1.pdf" compare="sim_size"/>
764 </test> 671 </test>
765 <test expect_num_outputs="3"> 672 <test expect_num_outputs="4">
766 <param name="infile" value="preprocessed.RData" ftype="rdata"/> 673 <param name="infile" value="123_combined.RData" ftype="rdata"/>
767 <repeat name="methods"> 674 <repeat name="methods">
768 <conditional name="methods_conditional"> 675 <conditional name="methods_conditional">
769 <param name="preprocessing_method" value="Peak_picking"/> 676 <param name="preprocessing_method" value="Peak_picking"/>
770 <param name="blocks_picking" value="3"/> 677 <param name="blocks_picking" value="3"/>
771 <param name="window_picking" value="5"/> 678 <param name="window_picking" value="5"/>
781 <conditional name="methods_for_alignment"> 688 <conditional name="methods_for_alignment">
782 <param name="alignment_method" value="DP"/> 689 <param name="alignment_method" value="DP"/>
783 </conditional> 690 </conditional>
784 </conditional> 691 </conditional>
785 </repeat> 692 </repeat>
786 <param name="outputs_select" value="quality_control"/> 693 <param name="summary_type" value="median,sd"/>
787 <param name="calibrant_file" ftype="tabular" value="inputcalibrantfile1.tabular"/>
788 <param name="calibrants_column" value="1"/>
789 <param name="plusminus_dalton" value="0.25"/>
790 <param name="output_matrix" value="True"/>
791 <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/> 694 <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/>
792 <output name="matrixasoutput" file="preprocessing_results2.txt" lines_diff="2"/> 695 <output name="summarized_output_median" file="preprocessing_median2.txt" lines_diff="2"/>
696 <output name="summarized_output_sd" file="preprocessing_sd2.txt" lines_diff="2"/>
793 <output name="QC_plots" file="preprocessing_results2.pdf" compare="sim_size"/> 697 <output name="QC_plots" file="preprocessing_results2.pdf" compare="sim_size"/>
794 </test> 698 </test>
795 <test expect_num_outputs="2"> 699 <test expect_num_outputs="3">
796 <param name="infile" value="" ftype="analyze75"> 700 <param name="infile" value="" ftype="analyze75">
797 <composite_data value="Analyze75.hdr"/> 701 <composite_data value="Analyze75.hdr"/>
798 <composite_data value="Analyze75.img"/> 702 <composite_data value="Analyze75.img"/>
799 <composite_data value="Analyze75.t2m"/> 703 <composite_data value="Analyze75.t2m"/>
800 </param> 704 </param>
817 <conditional name="methods_for_alignment"> 721 <conditional name="methods_for_alignment">
818 <param name="alignment_method" value="diff"/> 722 <param name="alignment_method" value="diff"/>
819 </conditional> 723 </conditional>
820 </conditional> 724 </conditional>
821 </repeat> 725 </repeat>
822 <param name="outputs_select" value="quality_control"/> 726 <param name="summary_type" value="mean"/>
823 <param name="calibrant_file" ftype="tabular" value="inputcalibrantfile2.tabular"/>
824 <param name="calibrants_column" value="1"/>
825 <param name="plusminus_dalton" value="0.25"/>
826 <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/> 727 <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/>
827 <output name="QC_plots" file="preprocessing_results3.pdf" compare="sim_size"/> 728 <output name="QC_plots" file="preprocessing_results3.pdf" compare="sim_size"/>
729 <output name="summarized_output_mean" file="preprocessing_mean3.txt" lines_diff="2"/>
828 </test> 730 </test>
829 <test expect_num_outputs="2"> 731 <test expect_num_outputs="3">
830 <param name="infile" value="" ftype="analyze75"> 732 <param name="infile" value="" ftype="analyze75">
831 <composite_data value="Analyze75.hdr"/> 733 <composite_data value="Analyze75.hdr"/>
832 <composite_data value="Analyze75.img"/> 734 <composite_data value="Analyze75.img"/>
833 <composite_data value="Analyze75.t2m"/> 735 <composite_data value="Analyze75.t2m"/>
834 </param> 736 </param>
841 <conditional name="methods_conditional"> 743 <conditional name="methods_conditional">
842 <param name="preprocessing_method" value="Data_reduction"/> 744 <param name="preprocessing_method" value="Data_reduction"/>
843 <param name="bin_width" value="0.1"/> 745 <param name="bin_width" value="0.1"/>
844 </conditional> 746 </conditional>
845 </repeat> 747 </repeat>
846 <param name="outputs_select" value="no_quality_control"/>
847 <param name="output_matrix" value="True"/> 748 <param name="output_matrix" value="True"/>
848 <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/> 749 <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/>
849 <output name="matrixasoutput" file="preprocessing_results4.txt"/> 750 <output name="matrixasoutput" file="preprocessing_results4.txt"/>
751 <output name="QC_plots" file="preprocessing_results4.pdf" compare="sim_size"/>
850 </test> 752 </test>
851 <test expect_num_outputs="3"> 753 <test expect_num_outputs="2">
852 <param name="infile" value="" ftype="imzml"> 754 <param name="infile" value="" ftype="imzml">
853 <composite_data value="Example_Continuous.imzML"/> 755 <composite_data value="Example_Continuous.imzML"/>
854 <composite_data value="Example_Continuous.ibd"/> 756 <composite_data value="Example_Continuous.ibd"/>
855 </param> 757 </param>
856 <repeat name="methods"> 758 <repeat name="methods">
860 <param name="reduction_method" value="resample"/> 762 <param name="reduction_method" value="resample"/>
861 <param name="step_width" value="0.1"/> 763 <param name="step_width" value="0.1"/>
862 </conditional> 764 </conditional>
863 </conditional> 765 </conditional>
864 </repeat> 766 </repeat>
865 <param name="outputs_select" value="quality_control"/> 767 <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/>
866 <param name="calibrant_file" ftype="tabular" value="inputcalibrantfile1.tabular"/>
867 <param name="calibrants_column" value="1"/>
868 <param name="plusminus_dalton" value="0.25"/>
869 <param name="output_matrix" value="True"/>
870 <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/>
871 <output name="matrixasoutput" file="preprocessing_results5.txt"/>
872 <output name="QC_plots" file="preprocessing_results5.pdf" compare="sim_size"/> 768 <output name="QC_plots" file="preprocessing_results5.pdf" compare="sim_size"/>
873 </test> 769 </test>
874 </tests> 770 </tests>
875 <help> 771 <help>
876 <![CDATA[ 772 <![CDATA[
897 793
898 794
899 Output: 795 Output:
900 796
901 - imzML file, preprocessed 797 - imzML file, preprocessed
902 - optional: pdf with heatmap of m/z of interest after each preprocessing step 798 - pdf with key values after each processing step
903 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) 799 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns)
904 800
905 Tip: 801 Tip:
906 802
907 - Peak alignment works only after peak picking 803 - Peak alignment works only after peak picking