changeset 6:c54e7284ff31 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_qualitycontrol commit 06c2b45d8644b1d7fc01622a5c59dcbf8886d0f1
author galaxyp
date Mon, 23 Apr 2018 17:17:30 -0400
parents ae98f109ba68
children ae9d7703ff27
files msi_qualitycontrol.xml test-data/LM8_file16output.pdf test-data/Testfile_qualitycontrol_analyze75.pdf test-data/Testfile_qualitycontrol_imzml.pdf test-data/Testfile_qualitycontrol_rdata.pdf
diffstat 5 files changed, 67 insertions(+), 32 deletions(-) [+]
line wrap: on
line diff
--- a/msi_qualitycontrol.xml	Sun Mar 11 10:37:31 2018 -0400
+++ b/msi_qualitycontrol.xml	Mon Apr 23 17:17:30 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.7.0.4">
+<tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.7.0.5">
     <description>
         mass spectrometry imaging QC
     </description>
@@ -243,11 +243,11 @@
     ## 1) Acquisition image
 
     pixelnumber = 1:pixelcount
-    pixelxyarray=cbind(coord(msidata),pixelnumber)
+    pixelxyarray=cbind(coord(msidata)[,1:2],pixelnumber)
 
     print(ggplot(pixelxyarray, aes(x=x, y=y, fill=pixelnumber))
      + geom_tile() + coord_fixed()
-     + ggtitle("1) Order of Acquisition")
+     + ggtitle("Order of Acquisition")
      +theme_bw()
      + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), 
                             space = "Lab", na.value = "black", name = "Acq"))
@@ -278,12 +278,12 @@
         }
 
         countvector= as.factor(colSums(pixelmatrix>0))
-        countdf= cbind(coord(msidata), countvector)
+        countdf= cbind(coord(msidata)[,1:2], countvector)
         mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen")
 
         print(ggplot(countdf, aes(x=x, y=y, fill=countvector))
            + geom_tile() + coord_fixed() 
-          + ggtitle("2) Number of calibrants per pixel")
+          + ggtitle("Number of calibrants per pixel")
           + theme_bw() 
           + theme(text=element_text(family="ArialMT", face="bold", size=12))
           + scale_fill_manual(values = mycolours[1:length(countvector)], 
@@ -352,7 +352,7 @@
             mass2vector = spectra(msidata)[features(msidata, mz = maxmass2),]
             foldchange = log2(mass1vector/mass2vector)
 
-            ratiomatrix = cbind(foldchange, coord(msidata))
+            ratiomatrix = cbind(foldchange, coord(msidata)[,1:2])
 
             print(ggplot(ratiomatrix, aes(x=x, y=y, fill=foldchange), colour=colo)
              + geom_tile() + coord_fixed()
@@ -371,7 +371,7 @@
     {   for (mass in 1:length(inputmasses))
         {
           image(msidata, mz=inputmasses[mass], plusminus=$plusminus_dalton, 
-          main= paste0("3", LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2)," ± ", $plusminus_dalton, " Da)"),
+          main= paste0(inputnames[mass], " (", round(inputmasses[mass], digits = 2)," ± ", $plusminus_dalton, " Da)"),
                 contrast.enhance = "histogram", ylim=c(maximumy+2, 0))
         }
     } else {print("3) The inputpeptide masses were not provided or outside the mass range")}
@@ -380,23 +380,23 @@
     ## 4) Number of peaks per pixel - image
 
     peaksperpixel = colSums(spectra(msidata)[]> 0)
-    peakscoordarray=cbind(coord(msidata), peaksperpixel)
+    peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel)
 
     print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo)
     + geom_tile() + coord_fixed() 
-     + ggtitle("4) Number of peaks per pixel")
+     + ggtitle("Number of peaks per pixel")
      + theme_bw() 
      + theme(text=element_text(family="ArialMT", face="bold", size=12))
      + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
                             ,space = "Lab", na.value = "black", name = "# peaks"))
 
     ## 5) TIC image 
-    TICcoordarray=cbind(coord(msidata), TICs)
+    TICcoordarray=cbind(coord(msidata)[,1:2], TICs)
     colo = colorRampPalette(
     c("blue", "cyan", "green", "yellow","red"))
     print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo)
      + geom_tile() + coord_fixed() 
-     + ggtitle("5) Total Ion Chromatogram")
+     + ggtitle("Total Ion Chromatogram")
      + theme_bw() 
      + theme(text=element_text(family="ArialMT", face="bold", size=12))
      + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange") 
@@ -405,12 +405,12 @@
     ## 6) Most abundant mass image 
 
     highestmz = apply(spectra(msidata)[],2,which.max) 
-    highestmz_matrix = cbind(coord(msidata),mz(msidata)[highestmz])
+    highestmz_matrix = cbind(coord(msidata)[,1:2],mz(msidata)[highestmz])
     colnames(highestmz_matrix)[3] = "highestmzinDa"
 
     print(ggplot(highestmz_matrix, aes(x=x, y=y, fill=highestmzinDa))
     + geom_tile() + coord_fixed() 
-    + ggtitle("6) Most abundant m/z in each pixel")
+    + ggtitle("Most abundant m/z in each pixel")
     + theme_bw() 
     + scale_fill_gradientn(colours = c("blue", "purple" , "red","orange"), space = "Lab", na.value = "black", name = "m/z", 
                            labels = as.character(pretty(highestmz_matrix\$highestmzinDa)[c(1,3,5,7)]),
@@ -427,7 +427,7 @@
     ## 7) pca image for two components
     pca = PCA(msidata, ncomp=2) 
     par(mfrow = c(2,1))
-    plot(pca, col=c("black", "darkgrey"), main="7) PCA for two components")
+    plot(pca, col=c("black", "darkgrey"), main="PCA for two components")
     image(pca, col=c("black", "white"),ylim=c(maximumy+2, 0),  strip=FALSE)
 
 
@@ -437,26 +437,26 @@
     par(mfrow = c(2,1), mar=c(5,6,4,2))
 
     ## 8a) number of peaks per spectrum - scatterplot
-    plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="8a) Number of peaks per spectrum")
+    plot_colorByDensity(pixels(msidata), peaksperpixel, ylab = "", xlab = "", main="Number of peaks per spectrum")
     title(xlab="Spectra index \n (= Acquisition time)", line=3)
     title(ylab="Number of peaks", line=4)
 
     ## 8b) number of peaks per spectrum - histogram
     hist(peaksperpixel, main="", las=1, xlab = "Number of peaks per spectrum", ylab="") 
-    title(main="8b) Number of peaks per spectrum", line=2)
+    title(main="Number of peaks per spectrum", line=2)
     title(ylab="Frequency = # spectra", line=4)
     abline(v=median(peaksperpixel), col="blue")
 
     ## 9a) TIC per spectrum - density scatterplot
     zero=0
     par(mfrow = c(2,1), mar=c(5,6,4,2))
-    plot_colorByDensity(pixels(msidata), TICs,  ylab = "", xlab = "", main="9a) TIC per pixel")
+    plot_colorByDensity(pixels(msidata), TICs,  ylab = "", xlab = "", main="TIC per spectrum")
     title(xlab="Spectra index \n (= Acquisition time)", line=3)
     title(ylab = "Total ion chromatogram intensity", line=4)
 
     ## 9b) TIC per spectrum -  histogram
     hist(log(TICs), main="", las=1, xlab = "log(TIC per spectrum)", ylab="")
-    title(main= "9b) TIC per spectrum", line=2)
+    title(main= "TIC per spectrum", line=2)
     title(ylab="Frequency = # spectra", line=4)
     abline(v=median(log(TICs[TICs>0])), col="blue") 
 
@@ -469,18 +469,18 @@
     peakspermz = rowSums(spectra(msidata)[] > 0 )
 
     par(mfrow = c(2,1), mar=c(5,6,4,4.5))
-    ## 11a) Number of peaks per mz - scatterplot
-    plot_colorByDensity(mz(msidata),peakspermz, main= "11a) Number of peaks for each mz", ylab ="")
+    ## Number of peaks per mz - scatterplot
+    plot_colorByDensity(mz(msidata),peakspermz, main= "Number of peaks per mz", ylab ="")
     title(xlab="mz in Dalton", line=2.5)
     title(ylab = "Number of peaks", line=4)
     axis(4, at=pretty(peakspermz),labels=as.character(round((pretty(peakspermz)/pixelcount*100), digits=1)), las=1)
     mtext("Coverage of spectra [%]", 4, line=3, adj=1)
 
     # make plot smaller to fit axis and labels, add second y axis with %
-    ## 11b) Number of peaks per mz - histogram
-    hist(peakspermz, main="", las=1, ylab="")
+    ## Number of peaks per mz - histogram
+    hist(peakspermz, main="", las=1, ylab="", xlab="")
     title(ylab = "Frequency", line=4)
-    title(main="11b) Number of peaks per mz", xlab = "Number of peaks per mz", line=2)
+    title(main="Number of peaks per mz", xlab = "Number of peaks per mz", line=2)
     abline(v=median(peakspermz), col="blue") 
 
 
@@ -491,12 +491,12 @@
 
     par(mfrow = c(2,1), mar=c(5,6,4,2))
     # 12a) sum of intensities per mz - scatterplot
-    plot_colorByDensity(mz(msidata),mzTIC,  main= "12a) Sum of all peak intensities for each mz", ylab ="")
+    plot_colorByDensity(mz(msidata),mzTIC,  main= "Sum of intensities per mz", ylab ="")
     title(xlab="mz in Dalton", line=2.5)
     title(ylab="Intensity sum", line=4)
     # 12b) sum of intensities per mz - histogram
     hist(log(mzTIC), main="", xlab = "", las=1, ylab="")
-    title(main="12b) Sum of intensities per mz", line=2, ylab="")
+    title(main="Sum of intensities per mz", line=2, ylab="")
     title(xlab = "log (sum of intensities per mz)")
     title(ylab = "Frequency", line=4)
     abline(v=median(log(mzTIC[mzTIC>0])), col="blue")
@@ -510,23 +510,28 @@
 
     ## 13a) Intensity histogram: 
     hist(log2(spectra(msidata)[]), main="", xlab = "", ylab="", las=1)
-    title(main="13a) Log2-transformed intensities", line=2)
+    title(main="Log2-transformed intensities", line=2)
     title(xlab="log2 intensities")
     title(ylab="Frequency", line=4)
     abline(v=median(log2(spectra(msidata)[(spectra(msidata)>0)])), col="blue")
 
     ## 13b) Median intensity over spectra
     medianint_spectra = apply(spectra(msidata), 2, median)
-    plot(medianint_spectra, main="13b) Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="")
+    plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index \n (= Acquisition time)", ylab="")
     title(ylab="Median spectrum intensity", line=4)
 
+    ## 13c) Histogram on mz values
+    par(mfrow = c(1, 1))
+    hist(mz(msidata), xlab = "mz in Dalton", main="Histogram of mz values")
+
+
     ## 14) Mass spectra 
 
     par(mfrow = c(2, 2))
     plot(msidata, pixel = 1:length(pixelnumber), main= "Average spectrum")
     plot(msidata, pixel =round(length(pixelnumber)/2, digits=0), main="Spectrum in middle of acquisition")
-    plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,])))
-    plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,])))
+    plot(msidata, pixel = highestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,1:2])))
+    plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,1:2])))
 
     ## 15) Zoomed in mass spectra for calibrants
     plusminusvalue = $plusminus_dalton
@@ -543,9 +548,9 @@
           abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
           plot(msidata[minmasspixel:maxmasspixel,], pixel =round(length(pixelnumber)/2, digits=0), main="pixel in middle of acquisition")
           abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
-          plot(msidata[minmasspixel:maxmasspixel,], pixel = highestmz_pixel,main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,])))
+          plot(msidata[minmasspixel:maxmasspixel,], pixel = highestmz_pixel,main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,1:2])))
           abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
-          plot(msidata[minmasspixel:maxmasspixel,], pixel = secondhighestmz_pixel,  main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,])))
+          plot(msidata[minmasspixel:maxmasspixel,], pixel = secondhighestmz_pixel,  main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,1:2])))
           abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3))
           title(paste0(inputcalibrants[x,1]), outer=TRUE)
           x=x+1
@@ -553,7 +558,37 @@
 
     }else{print("15) The inputcalibrant masses were not provided or outside the mass range")}
 
+    ## 16) ppm accuracy measured vs. theoretical calibrant mass
+
+    if (length(inputcalibrantmasses) != 0)
+    {
+        par(mfrow = c(1, 1))
+
+        differencevector = vector()
+
+        for (mass in 1:length(inputcalibrantmasses))
+        {mznumber = features(msidata, mz = inputcalibrantmasses[mass]) ### this gives the featurenumber which is closest to given mz
+        mzvalue = mz(msidata)[mznumber] ### gives the mz in Da which is closest to the given mz (using the featurenumber)
+        mzdifference = inputcalibrantmasses[mass] - mzvalue ### difference in Da: theoretical value - closest mz value
+        ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement
+        differencevector[mass] = ppmdifference }
+        differencevector = round(differencevector, digits=2)
+
+        ### plot the ppm difference theor. mz value to closest mz value: 
+
+        calibrant_names = as.character(calibrant_list[,2])
+        diff_df = data.frame(differencevector, calibrant_names)        
+        diff_plot<-ggplot(data=diff_df, aes(x=calibrant_names, y=differencevector)) + geom_col() + theme_minimal() +
+        labs(title="Theoretical calibrant mz vs. closest measured mz", x="calibrants", y = "Difference in ppm")+
+        geom_text(aes(label=differencevector), vjust=-0.3, size=3.5, col="blue")
+
+        print(diff_plot)
+
+    }else{print("16) The inputcalibrant masses were not provided or outside the mass range")}
+
+
 dev.off()
+
 }else{
   print("inputfile has no intensities > 0")
 dev.off()
@@ -579,7 +614,7 @@
         </repeat>
     </inputs>
     <outputs>
-        <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label = "${tool.name} on $infile.display_name"/>
+        <data format="pdf" name="plots" from_work_dir="qualitycontrol.pdf" label = "${tool.name} ${on_string}"/>
     </outputs>
 
     <tests>
Binary file test-data/LM8_file16output.pdf has changed
Binary file test-data/Testfile_qualitycontrol_analyze75.pdf has changed
Binary file test-data/Testfile_qualitycontrol_imzml.pdf has changed
Binary file test-data/Testfile_qualitycontrol_rdata.pdf has changed