comparison maldi_quant_peakdetection.xml @ 4:c6b47c89a2f5 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit ecdc3a64aa245d80dbc5487b2bf10a85a43adc6d
author galaxyp
date Fri, 22 Mar 2019 08:30:45 -0400
parents c42549f04fdd
children af766257766e
comparison
equal deleted inserted replaced
3:c42549f04fdd 4:c6b47c89a2f5
1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.3"> 1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.4">
2 <description> 2 <description>
3 Peak detection, binning and filtering for mass-spectrometry imaging data 3 Peak detection, binning and filtering for mass-spectrometry imaging data
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>maldi_macros.xml</import> 6 <import>maldi_macros.xml</import>
126 title(main=paste("$filename")) 126 title(main=paste("$filename"))
127 127
128 ## plot input file spectrum: 128 ## plot input file spectrum:
129 #if $centroids: 129 #if $centroids:
130 ## Choose random spectra for QC plots 130 ## Choose random spectra for QC plots
131 print(length(peaks))
131 random_spectra = sample(1:length(peaks), 4, replace=FALSE) 132 random_spectra = sample(1:length(peaks), 4, replace=FALSE)
133 random_spectra_name = pixelnames[random_spectra]
132 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 134 par(mfrow = c(2, 2), oma=c(0,0,2,0))
133 for (random_sample in random_spectra){ 135 for (random_sample in random_spectra){
134 plot(peaks[[random_sample]],sub="", main=paste0("spectrum ", random_sample))} 136 plot(peaks[[random_sample]],sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
135 title("Input spectra", outer=TRUE, line=0) 137 title("Input spectra", outer=TRUE, line=0)
136 138
137 #else 139 #else
138 ## Choose random spectra for QC plots 140 ## Choose random spectra for QC plots
139 random_spectra = sample(1:length(maldi_data), 4, replace=FALSE) 141 random_spectra = sample(1:length(maldi_data), 4, replace=FALSE)
140 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 142 par(mfrow = c(2, 2), oma=c(0,0,2,0))
141 for (random_sample in random_spectra){ 143 for (random_sample in random_spectra){
142 plot(maldi_data[[random_sample]],sub="", main=paste0("spectrum ", random_sample))} 144 plot(maldi_data[[random_sample]],sub="", main=paste0("spectrum ", pixelnames[random_sample]))
145 }
143 title("Input spectra", outer=TRUE, line=0) 146 title("Input spectra", outer=TRUE, line=0)
144 #end if 147 #end if
145 148
146 149
147 ## QC numbers for input file 150 ## QC numbers for input file
221 ## QC plot and numbers 224 ## QC plot and numbers
222 ## plot old spectra with baseline in blue and picked peaks in green 225 ## plot old spectra with baseline in blue and picked peaks in green
223 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 226 par(mfrow = c(2, 2), oma=c(0,0,2,0))
224 for (random_sample in random_spectra){ 227 for (random_sample in random_spectra){
225 noise = estimateNoise(maldi_data[[random_sample]], method= "$method.methods_conditional.peak_method") 228 noise = estimateNoise(maldi_data[[random_sample]], method= "$method.methods_conditional.peak_method")
226 plot(maldi_data[[random_sample]], sub="", main=paste0("spectrum ", random_sample)) 229 plot(maldi_data[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))
227 lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue") 230 lines(noise[,1], noise[,2]*$method.methods_conditional.snr, col="blue")
228 points(peaks[[random_sample]], col="green", pch=20)} 231 points(peaks[[random_sample]], col="green", pch=20)}
229 title("S/N in blue and picked peaks in green", outer=TRUE, line=0) 232 title("S/N in blue and picked peaks in green", outer=TRUE, line=0)
230 233
231 ## plot new spectrum 234 ## plot new spectrum
232 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 235 par(mfrow = c(2, 2), oma=c(0,0,2,0))
233 for (random_sample in random_spectra){ 236 for (random_sample in random_spectra){
234 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} 237 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
235 title("Picked peaks", outer=TRUE, line=0) 238 title("Picked peaks", outer=TRUE, line=0)
236 239
237 pixel_number = length(peaks) 240 pixel_number = length(peaks)
238 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 241 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
239 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 242 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
266 ## keep peaks to plot them with monoisotopic peaks 269 ## keep peaks to plot them with monoisotopic peaks
267 picked_peaks = peaks 270 picked_peaks = peaks
268 271
269 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, 272 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor,
270 tolerance=$method.methods_conditional.tolerance, 273 tolerance=$method.methods_conditional.tolerance,
271 distance=$method.methods_conditional.distance, 274 distance=c($method.methods_conditional.distance),
272 size=$method.methods_conditional.size) 275 size=$method.methods_conditional.size)
273 276
274 ## plot old spectrum with picked isotopes as green dots 277 ## plot old spectrum with picked isotopes as green dots
275 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 278 par(mfrow = c(2, 2), oma=c(0,0,2,0))
276 for (random_sample in random_spectra){ 279 for (random_sample in random_spectra){
277 plot(picked_peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample)) 280 plot(picked_peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))
278 points(peaks[[random_sample]], col="green", pch=20)} 281 points(peaks[[random_sample]], col="green", pch=20)}
279 title(paste0("Monoisotopic peaks in green"), outer=TRUE, line=0) 282 title(paste0("Monoisotopic peaks in green"), outer=TRUE, line=0)
280 283
281 284
282 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 285 par(mfrow = c(2, 2), oma=c(0,0,2,0))
283 for (random_sample in random_spectra){ 286 for (random_sample in random_spectra){
284 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} 287 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
285 title("Monoisotopic peaks", outer=TRUE, line=0) 288 title("Monoisotopic peaks", outer=TRUE, line=0)
286 289
287 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 290 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
288 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 291 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
289 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 292 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
363 #end if 366 #end if
364 367
365 ## QC plot and numbers 368 ## QC plot and numbers
366 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 369 par(mfrow = c(2, 2), oma=c(0,0,2,0))
367 for (random_sample in random_spectra){ 370 for (random_sample in random_spectra){
368 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} 371 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
369 title("Aligned spectra", outer=TRUE, line=0) 372 title("Aligned spectra", outer=TRUE, line=0)
370 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 373 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
371 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 374 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
372 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 375 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
373 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 376 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
398 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method") 401 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance, method="$method.methods_conditional.bin_method")
399 402
400 ## QC plot and numbers 403 ## QC plot and numbers
401 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 404 par(mfrow = c(2, 2), oma=c(0,0,2,0))
402 for (random_sample in random_spectra){ 405 for (random_sample in random_spectra){
403 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} 406 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
404 title("Binned spectra", outer=TRUE, line=0) 407 title("Binned spectra", outer=TRUE, line=0)
405 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 408 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
406 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 409 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
407 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 410 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
408 medint =round( median(unlist(lapply(peaks,intensity))), digits=2) 411 medint =round( median(unlist(lapply(peaks,intensity))), digits=2)
449 #end if 452 #end if
450 453
451 ##QC plot and numbers 454 ##QC plot and numbers
452 par(mfrow = c(2, 2), oma=c(0,0,2,0)) 455 par(mfrow = c(2, 2), oma=c(0,0,2,0))
453 for (random_sample in random_spectra){ 456 for (random_sample in random_spectra){
454 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", random_sample))} 457 plot(peaks[[random_sample]], sub="", main=paste0("spectrum ", pixelnames[random_sample]))}
455 title("Filtered spectra", outer=TRUE, line=0) 458 title("Filtered spectra", outer=TRUE, line=0)
456 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) 459 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
457 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) 460 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
458 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) 461 mean_features = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
459 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) 462 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
548 </conditional> 551 </conditional>
549 <repeat name="methods" title="Method" min="1"> 552 <repeat name="methods" title="Method" min="1">
550 <conditional name="methods_conditional"> 553 <conditional name="methods_conditional">
551 <param name="method" type="select" label="Select a method"> 554 <param name="method" type="select" label="Select a method">
552 <option value="Peak_detection">Peak detection</option> 555 <option value="Peak_detection">Peak detection</option>
553 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option> 556 <option value="monoisotopic_peaks">Monoisotopic peaks</option>
554 <option value="Align">Align Spectra (warping/phase correction)</option> 557 <option value="Align">Align Spectra (warping/phase correction)</option>
555 <option value="Binning">Binning</option> 558 <option value="Binning">Binning</option>
556 <option value="Filtering">Filtering</option> 559 <option value="Filtering">Filtering</option>
557 </param> 560 </param>
558 <when value="Peak_detection"> 561 <when value="Peak_detection">
569 <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio" help=""/> 572 <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio" help=""/>
570 <param name="use_annotations" type="boolean" label="Detect peaks on average mass spectra" help="Spectra with same annotation are averaged before peak detection, no imzML output" truevalue="TRUE" falsevalue="FALSE"/> 573 <param name="use_annotations" type="boolean" label="Detect peaks on average mass spectra" help="Spectra with same annotation are averaged before peak detection, no imzML output" truevalue="TRUE" falsevalue="FALSE"/>
571 </when> 574 </when>
572 <when value="monoisotopic_peaks"> 575 <when value="monoisotopic_peaks">
573 <param name="minCor" type="float" value="0.95" label="Minimal correlation" 576 <param name="minCor" type="float" value="0.95" label="Minimal correlation"
574 help="Minimal correlation between the peak pattern generated by the model and the experimental peaks in the MassPeaks object to be recognized as isotopic pattern"/> 577 help="Minimal correlation between the peak pattern generated by the model and the experimental peaks in the MassPeaks object to be recognized as isotopic pattern."/>
575 <param name="tolerance" type="float" label="Tolerance" value="0.00005" 578 <param name="tolerance" type="float" label="Tolerance" value="0.0001"
576 help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 50ppm use 0.00005 or 50e-6" /> 579 help="Maximal relative deviation of a peak position (m/z) to be considered as identical: abs(((mass[i]+distance)-mass[i+1])/mass[i]) smaller than 'tolerance'. For 100ppm use 0.0001" />
577 <param name="distance" type="float" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides."/> 580 <param name="distance" type="text" label="Distance" value="1.00235" help="Distance between two consecutive peaks in an isotopic pattern. 1.00235 is average distance for polypeptides. Multiple values can be used to find multiple charged pattern e.g. 1, 0.5 ,0.33">
578 <param name="size" type="integer" label="Size" value="3" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2."/> 581 <sanitizer invalid_char="">
582 <valid initial="string.digits">
583 <add value="," />
584 <add value=":" />
585 <add value="." />
586 </valid>
587 </sanitizer>
588 </param>
589 <param name="size" type="text" label="Size" value="3:10" help="Size (length) of isotopic pattern, longer patterns are prefered over shorter ones, min size is 2, a range can be used.">
590 <sanitizer invalid_char="">
591 <valid initial="string.digits">
592 <add value=":" />
593 </valid>
594 </sanitizer>
595 </param>
579 </when> 596 </when>
580 597
581 <when value="Align"> 598 <when value="Align">
582 <param name="warping_method" type="select" label="Warping methods"> 599 <param name="warping_method" type="select" label="Warping methods">
583 <option value="lowess" selected="True">Lowess</option> 600 <option value="lowess" selected="True">Lowess</option>
612 </when> 629 </when>
613 630
614 <when value="Binning"> 631 <when value="Binning">
615 <param name="bin_tolerance" type="float" value="0.002" label="Tolerance" 632 <param name="bin_tolerance" type="float" value="0.002" label="Tolerance"
616 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/> 633 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/>
617 <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the sampe sample. relaxed - allows multiple peaks of the same sample in one bin."> 634 <param name="bin_method" display="radio" type="select" label="Bin creation rule" help="strict - creates bins never containing two or more peaks of the same sample. relaxed - allows multiple peaks of the same sample in one bin.">
618 <option value="strict" selected="True" >strict</option> 635 <option value="strict" selected="True" >strict</option>
619 <option value="relaxed" >relaxed</option> 636 <option value="relaxed" >relaxed</option>
620 </param> 637 </param>
621 </when> 638 </when>
622 <when value="Filtering"> 639 <when value="Filtering">
671 <param name="centroids" value="TRUE"/> 688 <param name="centroids" value="TRUE"/>
672 <repeat name="methods"> 689 <repeat name="methods">
673 <conditional name="methods_conditional"> 690 <conditional name="methods_conditional">
674 <param name="method" value="monoisotopic_peaks"/> 691 <param name="method" value="monoisotopic_peaks"/>
675 <param name="tolerance" value="0.0004"/> 692 <param name="tolerance" value="0.0004"/>
693 <param name="size" value="3"/>
676 </conditional> 694 </conditional>
677 </repeat> 695 </repeat>
678 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> 696 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/>
679 <output name="masspeaks" file="masspeaks2.tabular"/> 697 <output name="masspeaks" file="masspeaks2.tabular"/>
680 <output name="intensity_matrix" file="int2.tabular"/> 698 <output name="intensity_matrix" file="int2.tabular"/>
800 - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking. 818 - Detect peaks on average mass spectra: Spectra with the same annotation (taken from the annotation tabular input) are averaged and peak picking is performed on the average spectrum of each annotation group. The exported imzML is empty and cannot be used for further analysis steps. The peaklist and intensity matrix outputs contain the annotation group names with their averaged intensity values. Filtering steps have to be done in the same run as the peak picking.
801 819
802 820
803 - Monoisotopic peaks: Keeps only the monoisotopic peaks 821 - Monoisotopic peaks: Keeps only the monoisotopic peaks
804 822
823 - Based on poisson model for isotopic patterns as decribed in (`Breen et al. <https://doi.org/10.1002/1522-2683(20000601)21:11%3C2243::AID-ELPS2243%3E3.0.CO;2-K>`_)
824 - Isotopic pattern can be characterized and recognized by
825
826 - the similarity of the experimental pattern with the modelled pattern
827 - the distance between consecutive isotopic peaks. For polypeptides the average distance is 1.00235 (`Park et al. <https://pubs.acs.org/doi/abs/10.1021/ac800913b>`_). Multiply charged analytes have smaller distances between the peaks (e.g. z = 1 distance = ~1; z = 2: distance = ~0.5; z = 3: distance = ~0.3333) To search for differently charged isotopic pattern multiple distances can be applied - the order matters because the first distance that matches is reported (1, 0.5, 0.3333).
828 - the size (length) of the pattern, multiple values can be applied, longer patterns are prefered over shorter ones.
829
805 830
806 - Spectra alignment (warping): alignment for (re)calibration of m/z values. 831 - Spectra alignment (warping): alignment for (re)calibration of m/z values.
807 832
808 - without external reference m/z: internal reference is obtained by filtering (default 90%) and binning the peaks to find landmark peaks and their average m/z 833 - without external reference m/z: internal reference is obtained by filtering (default 90%) and binning the peaks to find landmark peaks and their average m/z
809 - with external reference m/z: the m/z provided in a tabular file are used as a reference, at least 10 reference values are recommended 834 - with external reference m/z: the m/z provided in a tabular file are used as a reference, at least 10 reference values are recommended