changeset 9:060440cf66f3 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit d008f6ea0f5c8435fb975a34cb99ea4d42c5ebd2"
author galaxyp
date Wed, 13 May 2020 17:55:17 +0000
parents 5f13a2f850c6
children e4dda61bb5c8
files macros.xml quality_report.xml test-data/QC_analyze75.pdf test-data/QC_empty_spectra.pdf test-data/QC_imzml.pdf test-data/QC_rdata.pdf test-data/preprocessing_results1.ibd test-data/preprocessing_results1.imzml test-data/preprocessing_results1.imzml.txt test-data/preprocessing_results1.pdf test-data/preprocessing_results2.ibd test-data/preprocessing_results2.imzml test-data/preprocessing_results2.imzml.txt test-data/preprocessing_results2.pdf test-data/preprocessing_results3.ibd test-data/preprocessing_results3.imzml test-data/preprocessing_results3.imzml.txt test-data/preprocessing_results3.pdf test-data/preprocessing_results4.ibd test-data/preprocessing_results4.imzml test-data/preprocessing_results4.imzml.txt test-data/preprocessing_results4.pdf test-data/preprocessing_results5.ibd test-data/preprocessing_results5.imzml test-data/preprocessing_results5.imzml.txt test-data/preprocessing_results5.pdf
diffstat 26 files changed, 61 insertions(+), 51 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Thu Apr 23 12:01:59 2020 +0000
+++ b/macros.xml	Wed May 13 17:55:17 2020 +0000
@@ -32,6 +32,12 @@
     <token name="@READING_MSIDATA@"><![CDATA[
         ## importing MSI data files
 
+            ## read RData files (MSI and other data) independent of filename
+            loadRData <- function(fileName){
+            load(fileName)
+            get(ls()[ls() != "fileName"])
+            }
+
         #if $infile.ext == 'imzml'
             #if str($processed_cond.processed_file) == "processed":
                 msidata <- readImzML('infile', resolution=$processed_cond.accuracy, attach.only=TRUE, units = "$processed_cond.units")
@@ -45,11 +51,6 @@
             msidata = readAnalyze('infile', attach.only=TRUE)
             centroided(msidata) = $centroids
         #else
-            ## function to read RData files independent of filename
-            loadRData <- function(fileName){
-            load(fileName)
-            get(ls()[ls() != "fileName"])
-            }
             msidata = loadRData('infile.RData')
         #end if
 
--- a/quality_report.xml	Thu Apr 23 12:01:59 2020 +0000
+++ b/quality_report.xml	Wed May 13 17:55:17 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.0">
+<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.1">
     <description>
         mass spectrometry imaging QC
     </description>
@@ -40,7 +40,7 @@
     msidata = as(msidata, "MSImagingExperiment")
     run(msidata) = "infile"
     }
-print(class(msidata))
+
 ## remove duplicated coordinates
 msidata <- msidata[,!duplicated(coord(msidata))]
 
@@ -74,7 +74,7 @@
 ## Percentage of intensities > 0
 percpeaks = round(npeaks/numpeaks*100, digits=2)
 ## Number of empty TICs
-TICs = colSums(spectra(msidata), na.rm=TRUE) 
+TICs = pixelApply(msidata, sum)
 NumemptyTIC = sum(TICs == 0)
 ## Median und sd TIC
 medTIC = round(median(TICs), digits=1)
@@ -329,8 +329,8 @@
             ### plot spatial distribution of fold change
 
             ## calculate mean intensity for each m/z over the ppm range; then calculate log2 foldchange
-            mass1vector = colMeans(spectra(filtered_data1), na.rm =TRUE)
-            mass2vector = colMeans(spectra(filtered_data2), na.rm = TRUE)
+            mass1vector = pixelApply(filtered_data1, mean, na.rm =TRUE)
+            mass2vector = pixelApply(filtered_data2, mean, na.rm = TRUE)
             foldchange= log2(mass1vector/mass2vector)
             fcmatrix = data.frame(coord(msidata)\$x, coord(msidata)\$y,foldchange)
             colnames(fcmatrix) = c("x", "y", "foldchange")
@@ -610,7 +610,7 @@
     ########################## 13) Sum of intensities per m/z ##################
 
     ## Sum of all intensities for each m/z (like TIC, but for m/z instead of pixel)
-    mzTIC = rowSums(spectra(msidata), na.rm=TRUE) ## calculate intensity sum for each m/z
+    mzTIC = featureApply(msidata, sum, na.rm=TRUE) ## calculate intensity sum for each m/z
 
     par(mfrow = c(2,1), mar=c(5,6,4,2))
     ## 13a) scatterplot
@@ -633,7 +633,7 @@
     par(mfrow = c(2,1), mar=c(5,6,4,2))
 
     ## 14a) Median intensity over spectra
-    medianint_spectra = apply(spectra(msidata), 2, median, na.rm=TRUE) 
+    medianint_spectra = pixelApply(msidata, median)
     plot(medianint_spectra, main="Median intensity per spectrum",las=1, xlab="Spectra index", ylab="")
     title(ylab="Median spectrum intensity", line=4)
     if (!is.null(levels(msidata\$annotation))){
@@ -682,22 +682,30 @@
         boxplot(log10(mean_matrix), ylab = "Log10 mean intensity per m/z", main="Log10 mean m/z intensities per annotation group", xaxt = "n")
         (axis(1, at = c(1:number_combined), labels=levels(msidata\$annotation), las=2))
 
-        ## 14e) Heatmap of pearson correlation on mean intensities between annotation groups
+        ## 14e) Heatmap of mean intensities of annotation groups
 
-        corr_matrix = mean_matrix
-        corr_matrix[corr_matrix == 0] <- NA
-        colnames(corr_matrix) = levels(msidata\$annotation)
+        colnames(mean_matrix) = levels(msidata\$annotation)
+        mean_matrix[is.na(mean_matrix)] = 0
+            heatmap.parameters <- list(mean_matrix, 
+            show_rownames = T, show_colnames = T,
+            main = "Heatmap of mean intensities per annotation group")
+            par(oma=c(3,0,0,0))
+            print(heatmap(mean_matrix),margins = c(10, 10))
+
 
-        ## pearson correlation is only possible if there are at least 2 groups
-        if (length(colnames)>1)
-        {
-            corr_matrix = cor(corr_matrix, method= "pearson",use="complete.obs")
+        ## 14f) PCA of mean intensities of annotation groups
 
-            heatmap.parameters <- list(corr_matrix, 
-            show_rownames = T, show_colnames = T,
-            main = "Pearson correlation on mean intensities")
-            do.call("pheatmap", heatmap.parameters)
-        }
+        ## define annotation by colour
+        annotation_colour = rainbow(length(levels(msidata\$annotation)))[as.factor(levels(msidata\$annotation))]
+        ## transform and scale dataframe
+        pca = prcomp(t(mean_matrix),center=FALSE,scale.=FALSE)
+        ## plot single plot
+        plot(pca\$x[,c(1,2)],col=annotation_colour,pch=19)
+        ## plot pca with colours for max first 5 PCs
+        pc_comp = ifelse(ncol(pca\$x)<5 , ncol(pca\$x), 5)
+        pairs(pca\$x[,1:pc_comp],col=annotation_colour,pch=19)
+        legend("bottom", horiz = TRUE, legend=levels(msidata\$annotation), col=rainbow(length(levels(msidata\$annotation))), pch=19)
+
     }
 
     ################################## VI) Mass spectra and m/z accuracy ########################
@@ -710,13 +718,13 @@
     msidata_no_NA = msidata
 
     ## find three equal m/z ranges for the average mass spectra plots: 
-    third_mz_range = nrow(msidata_no_NA)/3
+    third_mz_range = round(nrow(msidata_no_NA)/3,0)
 
     par(cex.axis=1, cex.lab=1, mar=c(5.1,4.1,4.1,2.1))
     print(plot(msidata_no_NA, run="infile", layout=c(2,2), strip=FALSE, main= "Average spectrum"))
-    print(plot(msidata_no_NA[1:third_mz_range,], run="infile", layout=FALSE, strip=FALSE, main= "Zoomed average spectrum"))
-    print(plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], run="infile", layout=FALSE, strip=FALSE, main= "Zoomed average spectrum"))
-    print(plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], run="infile", layout=FALSE, strip=FALSE, main= "Zoomed average spectrum"))
+    print(plot(msidata_no_NA[1:third_mz_range,], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum"))
+    print(plot(msidata_no_NA[third_mz_range:(2*third_mz_range),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum"))
+    print(plot(msidata_no_NA[(2*third_mz_range):nrow(msidata_no_NA),], layout=FALSE, run="infile", strip=FALSE, main="Zoomed average spectrum"))
 
     ## plot one average mass spectrum for each pixel annotation group
 
@@ -762,7 +770,7 @@
             filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
 
             if (nrow(filtered_data) > 0 & sum(spectra(filtered_data)) > 0){
-                maxmassrow = rowMeans(spectra(filtered_data)) ## for each m/z average intensity is calculated
+                maxmassrow = featureApply(filtered_data, mean) ## for each m/z average intensity is calculated
                 maxvalue = mz(filtered_data)[which.max(maxmassrow)] ### m/z with highest average intensity in m/z range
                 mzdifference = maxvalue - inputcalibrantmasses[mass] ### difference: theoretical value - closest m/z value
             ppmdifference = mzdifference/inputcalibrantmasses[mass]*1000000 ### calculate ppm for accuracy measurement
@@ -1115,7 +1123,8 @@
 - Histogram of intensities. 
 - (annot) Intensities per annotation group: Same histogram as before but with colours to show the contribution of each pixel annotation group. 
 - (annot) Log10 mean intensities per m/z and annotation group: For all pixels of an annotation group the log10 mean intensity for each m/z is calculated and shown as boxplot. 
-- (annot) Pearson correlation between annotation groups (needs at least 2 groups) based on mean intensities and shown as heatmap.
+- (annot) Heatmap of mean intensity per m/z
+- (annot) PCA of mean intensity per m/z
 
 **Mass spectra and m/z accuracy**
 
Binary file test-data/QC_analyze75.pdf has changed
Binary file test-data/QC_empty_spectra.pdf has changed
Binary file test-data/QC_imzml.pdf has changed
Binary file test-data/QC_rdata.pdf has changed
Binary file test-data/preprocessing_results1.ibd has changed
--- a/test-data/preprocessing_results1.imzml	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results1.imzml	Wed May 13 17:55:17 2020 +0000
@@ -9,8 +9,8 @@
 		<fileContent>
 			<cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value="" />
 			<cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value="" />
-			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="722d404b-4188-40b4-a3b2-3764ebbdecad" />
-			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="c5aded0bab57fd7317901ff7709f2395fc91152e" />
+			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="25fb4349-d529-4b12-8523-a6aa0306ecf4" />
+			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="7f41f9f5efcb31248caefc91b7d6edce87d7f42c" />
 			<cvParam cvRef="IMS" accession="IMS:1000030" name="continuous" value="" />
 		</fileContent>
 	</fileDescription>
--- a/test-data/preprocessing_results1.imzml.txt	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results1.imzml.txt	Wed May 13 17:55:17 2020 +0000
@@ -1,4 +1,4 @@
 imzML file:
 total 24
--rw-r--r-- 1 meli meli   216 Mär 24 10:39 ibd
--rw-r--r-- 1 meli meli 18090 Mär 24 10:39 imzml
+-rw-r--r-- 1 meli meli   216 Mai 10 17:37 ibd
+-rw-r--r-- 1 meli meli 18090 Mai 10 17:37 imzml
Binary file test-data/preprocessing_results1.pdf has changed
Binary file test-data/preprocessing_results2.ibd has changed
--- a/test-data/preprocessing_results2.imzml	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results2.imzml	Wed May 13 17:55:17 2020 +0000
@@ -9,8 +9,8 @@
 		<fileContent>
 			<cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value="" />
 			<cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value="" />
-			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="e536d1e2-427b-4c5b-9743-8210ae52a564" />
-			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="7cea783e9d345946b874fc1f25d40ce90bffd2f9" />
+			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="fa59b434-12e0-47fe-b338-814c92daa7d6" />
+			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="5764be27b51a23d8c23e4c93befb9d8e57161bd9" />
 			<cvParam cvRef="IMS" accession="IMS:1000030" name="continuous" value="" />
 		</fileContent>
 	</fileDescription>
--- a/test-data/preprocessing_results2.imzml.txt	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results2.imzml.txt	Wed May 13 17:55:17 2020 +0000
@@ -1,4 +1,4 @@
 imzML file:
 total 64
--rw-r--r-- 1 meli meli 37404 Mär 24 10:40 ibd
--rw-r--r-- 1 meli meli 22796 Mär 24 10:40 imzml
+-rw-r--r-- 1 meli meli 37404 Mai 10 17:38 ibd
+-rw-r--r-- 1 meli meli 22796 Mai 10 17:38 imzml
Binary file test-data/preprocessing_results2.pdf has changed
Binary file test-data/preprocessing_results3.ibd has changed
--- a/test-data/preprocessing_results3.imzml	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results3.imzml	Wed May 13 17:55:17 2020 +0000
@@ -9,8 +9,8 @@
 		<fileContent>
 			<cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value="" />
 			<cvParam cvRef="MS" accession="MS:1000127" name="centroid spectrum" value="" />
-			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="df9dd256-6485-4954-8338-ddb0c760d4dc" />
-			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="ea9ad44f2615ef4d24b92d3ceb38dfa8ec47a978" />
+			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="d11b7693-3397-4d1f-9e67-29d952701100" />
+			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="94906879d47bbdcf97126d50c906dfd76e6681e4" />
 			<cvParam cvRef="IMS" accession="IMS:1000031" name="processed" value="" />
 		</fileContent>
 	</fileDescription>
--- a/test-data/preprocessing_results3.imzml.txt	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results3.imzml.txt	Wed May 13 17:55:17 2020 +0000
@@ -1,4 +1,4 @@
 imzML file:
 total 24
--rw-r--r-- 1 meli meli   960 Apr 22 15:36 ibd
--rw-r--r-- 1 meli meli 18112 Apr 22 15:36 imzml
+-rw-r--r-- 1 meli meli   960 Mai 10 17:39 ibd
+-rw-r--r-- 1 meli meli 18112 Mai 10 17:39 imzml
Binary file test-data/preprocessing_results3.pdf has changed
Binary file test-data/preprocessing_results4.ibd has changed
--- a/test-data/preprocessing_results4.imzml	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results4.imzml	Wed May 13 17:55:17 2020 +0000
@@ -9,8 +9,8 @@
 		<fileContent>
 			<cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value="" />
 			<cvParam cvRef="MS" accession="MS:1000128" name="profile spectrum" value="" />
-			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="db79298c-8368-42d7-84fc-18cad6ef1924" />
-			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="18bae3cc87b4c9aab2577cffe2fbc7425a93270b" />
+			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="c6122884-ceb5-46b6-9160-5d28e8862946" />
+			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="03e4b58b8e63517605164e90b85fe93a851a9c5b" />
 			<cvParam cvRef="IMS" accession="IMS:1000030" name="continuous" value="" />
 		</fileContent>
 	</fileDescription>
--- a/test-data/preprocessing_results4.imzml.txt	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results4.imzml.txt	Wed May 13 17:55:17 2020 +0000
@@ -1,4 +1,4 @@
 imzML file:
 total 84
--rw-r--r-- 1 meli meli 63256 Mär 24 11:35 ibd
--rw-r--r-- 1 meli meli 18199 Mär 24 11:35 imzml
+-rw-r--r-- 1 meli meli 63256 Mai 10 17:40 ibd
+-rw-r--r-- 1 meli meli 18199 Mai 10 17:40 imzml
Binary file test-data/preprocessing_results4.pdf has changed
Binary file test-data/preprocessing_results5.ibd has changed
--- a/test-data/preprocessing_results5.imzml	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results5.imzml	Wed May 13 17:55:17 2020 +0000
@@ -9,8 +9,8 @@
 		<fileContent>
 			<cvParam cvRef="MS" accession="MS:1000579" name="MS1 spectrum" value="" />
 			<cvParam cvRef="MS" accession="MS:1000128" name="profile spectrum" value="" />
-			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="1cb5b7df-bc82-4588-9a14-1dd58a3983a7" />
-			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="a16bf7a9d9d4064ce94821e3e7df2681725a497b" />
+			<cvParam cvRef="IMS" accession="IMS:1000080" name="universally unique identifier" value="3aff713c-00ec-422d-b63d-efd45fbdd7cc" />
+			<cvParam cvRef="IMS" accession="IMS:1000091" name="ibd SHA-1" value="9316138c1e7484662943bb206c79c2d074121530" />
 			<cvParam cvRef="IMS" accession="IMS:1000030" name="continuous" value="" />
 		</fileContent>
 	</fileDescription>
--- a/test-data/preprocessing_results5.imzml.txt	Thu Apr 23 12:01:59 2020 +0000
+++ b/test-data/preprocessing_results5.imzml.txt	Wed May 13 17:55:17 2020 +0000
@@ -1,4 +1,4 @@
 imzML file:
 total 428
--rw-r--r-- 1 meli meli 415936 Apr 19 21:07 ibd
--rw-r--r-- 1 meli meli  18569 Apr 19 21:07 imzml
+-rw-r--r-- 1 meli meli 415936 Mai 10 17:40 ibd
+-rw-r--r-- 1 meli meli  18569 Mai 10 17:40 imzml
Binary file test-data/preprocessing_results5.pdf has changed