Mercurial > repos > galaxyp > msi_qualitycontrol
changeset 4:f44e887bd5b7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/msi_qualitycontrol commit edbf2a6cb50fb04d0db56a7557a64e3bb7a0806a
| author | galaxyp | 
|---|---|
| date | Thu, 01 Mar 2018 08:24:22 -0500 | 
| parents | 62cd51699137 | 
| children | ae98f109ba68 | 
| files | msi_qualitycontrol.xml test-data/LM8_file16.rdata test-data/LM8_file16output.pdf test-data/Testfile_qualitycontrol_analyze75.pdf test-data/Testfile_qualitycontrol_imzml.pdf test-data/Testfile_qualitycontrol_rdata.pdf | 
| diffstat | 5 files changed, 91 insertions(+), 85 deletions(-) [+] | 
line wrap: on
 line diff
--- a/msi_qualitycontrol.xml Sat Feb 24 12:48:06 2018 -0500 +++ b/msi_qualitycontrol.xml Thu Mar 01 08:24:22 2018 -0500 @@ -1,4 +1,4 @@ -<tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.7.0.2"> +<tool id="mass_spectrometry_imaging_qc" name="MSI Qualitycontrol" version="1.7.0.3"> <description> mass spectrometry imaging QC </description> @@ -34,41 +34,17 @@ library(gridExtra) library(KernSmooth) -## Read MALDI Imagind dataset +## Read MALDI Imaging dataset #if $infile.ext == 'imzml' - msidata <- readMSIData('infile.imzML') + msidata = readMSIData('infile.imzML') #elif $infile.ext == 'analyze75' - msidata <- readMSIData('infile.hdr') + msidata = readMSIData('infile.hdr') #else load('infile.RData') #end if -#if $inputpeptidefile: - ### Read tabular file with peptide masses for plots and heatmap images: - input_list = read.delim("$inputpeptidefile", header = FALSE, na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE) - if (ncol(input_list) == 1) - { - input_list = cbind(input_list, input_list) - } -#else - input_list = data.frame(0, 0) -#end if -colnames(input_list)[1:2] = c("mz", "name") - -#if $inputcalibrants: - ### Read tabular file with calibrant masses: - calibrant_list = read.delim("$inputcalibrants", header = FALSE, na.strings=c("","NA", "#NUM!", "#ZAHL!"), stringsAsFactors = FALSE) - if (ncol(calibrant_list) == 1) - { - calibrant_list = cbind(calibrant_list, calibrant_list) - } -#else - calibrant_list = data.frame(0,0) -#end if - -colnames(calibrant_list)[1:2] = c("mz", "name") ###################################### file properties in numbers ###################### @@ -131,11 +107,47 @@ peakpickinginfo=processinginfo@peakPicking } -### calculate how many input peptide masses are valid: -inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,] +### Read tabular file with peptide masses for plots and heatmap images: + +#if $peptide_file: + + input_list = read.delim("$peptide_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE) + if (ncol(input_list) == 1) + { + input_list = cbind(input_list, input_list) + } + + ### calculate how many input peptide masses are valid: + inputpeptides = input_list[input_list[,1]>minmz & input_list[,1]<maxmz,] + number_peptides_in = length(input_list[,1]) + number_peptides_valid = length(inputpeptides) + +#else + inputpeptides = data.frame(0,0) + number_peptides_in = 0 + number_peptides_valid = 0 +#end if -### calculate how many input calibrant masses are valid: -inputcalibrants = calibrant_list[calibrant_list[,1]>minmz & calibrant_list[,1]<maxmz,] +colnames(inputpeptides) = c("mz", "name") + +#if $calibrant_file: + ### Read tabular file with calibrant masses: + calibrant_list = read.delim("$calibrant_file", header = FALSE, na.strings=c("","NA"), stringsAsFactors = FALSE) + if (ncol(calibrant_list) == 1) + { + calibrant_list = cbind(calibrant_list, calibrant_list) + } + ### calculate how many input calibrant masses are valid: + inputcalibrants = calibrant_list[calibrant_list[,1]>minmz & calibrant_list[,1]<maxmz,] + number_calibrants_in = length(calibrant_list[,1]) + number_calibrants_valid = length(inputcalibrants) +#else + inputcalibrants = data.frame(0,0) + number_calibrants_in = 0 + number_calibrants_valid = 0 +#end if + +colnames(inputcalibrants) = c("mz", "name") ### bind inputcalibrants and inputpeptides together, to make heatmap on both lists @@ -159,7 +171,8 @@ "Baseline reduction", "Peak picking", "Centroided", - paste0("# valid masses in ", "$filename")) + paste0("# peptides in ", "$peptide_file.display_name"), + paste0("# calibrants in ", "$calibrant_file.display_name")) values = c(paste0(maxfeatures), paste0(minmz, " - ", maxmz), @@ -176,7 +189,8 @@ paste0(baselinereductioninfo), paste0(peakpickinginfo), paste0(centroidedinfo), - paste0(length(inputmasses), "/", length(calibrant_list[,1])+length(input_list[,1]))) + paste0(number_peptides_valid, " / " , number_peptides_in), + paste0(number_calibrants_valid, " / ", number_calibrants_in)) property_df = data.frame(properties, values) @@ -207,24 +221,17 @@ xlim=c(min(x1),max(x1)), xlab="",ylab="",main=""){ - df <- data.frame(x1,x2) - x <- densCols(x1,x2, colramp=colorRampPalette(c("black", "white"))) - df\$dens <- col2rgb(x)[1,] + 1L - cols <- colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256) - df\$col <- cols[df\$dens] + df = data.frame(x1,x2) + x = densCols(x1,x2, colramp=colorRampPalette(c("black", "white"))) + df\$dens = col2rgb(x)[1,] + 1L + cols = colorRampPalette(c("#000099", "#00FEFF", "#45FE4F","#FCFF00", "#FF9400", "#FF3100"))(256) + df\$col = cols[df\$dens] plot(x2~x1, data=df[order(df\$dens),], ylim=ylim,xlim=xlim,pch=20,col=col, cex=1,xlab=xlab,ylab=ylab,las=1, main=main) } - ## Variables for plots - xrange = 1 - yrange = 1 - maxx = max(coord(msidata)[,1])+xrange - minx = min(coord(msidata)[,1])-xrange - maxy = max(coord(msidata)[,2])+yrange - miny = min(coord(msidata)[,2])-yrange ############################################################################ @@ -249,8 +256,8 @@ { for (calibrantnr in 1:length(inputcalibrantmasses)) { calibrantmz = inputcalibrantmasses[calibrantnr] - calibrantfeaturemin = features(msidata, mz=calibrantmz-$plusminusinDalton) - calibrantfeaturemax = features(msidata, mz=calibrantmz+$plusminusinDalton) + calibrantfeaturemin = features(msidata, mz=calibrantmz-$plusminus_dalton) + calibrantfeaturemax = features(msidata, mz=calibrantmz+$plusminus_dalton) if (calibrantfeaturemin == calibrantfeaturemax) { @@ -267,7 +274,7 @@ countvector= as.factor(colSums(pixelmatrix>0)) countdf= cbind(coord(msidata), countvector) - mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrap1", "lightseagreen") + mycolours = c("black","grey", "darkblue", "blue", "green" , "red", "yellow", "magenta", "olivedrab1", "lightseagreen") print(ggplot(countdf, aes(x=x, y=y, fill=countvector)) + geom_tile() + coord_fixed() @@ -276,7 +283,7 @@ + theme(text=element_text(family="ArialMT", face="bold", size=12)) + scale_fill_manual(values = mycolours[1:length(countvector)], na.value = "black", name = "# calibrants")) - }else{print("2) The inputcalibrant masses were outside the mass range")} + }else{print("2) The inputcalibrant masses were not provided or outside the mass range")} ############# new 2b) image of foldchanges (log2 intensity ratios) between two masses in the same spectrum @@ -306,25 +313,24 @@ mzdown2 = features(msidata, mz = mass2-2) mzup2 = features(msidata, mz = mass2+3) - ### plot the part which was chosen, with chosen value in blue, distance in blue, maxmass in red, xlim fixed to 5 Da window + ### find mass in the given range with the highest intensity (will be plotted in red) if (mzrowdown1 == mzrowup1) { - maxmassrow1 = spectra(msidata)[mzrowup1,] - maxmass1 = mz(msidata)[mzrowup1][which.max(maxmassrow1)] - }else{ + maxmass1 = mz(msidata)[ mzrowdown1] + }else{ ### for all masses in the massrange calculate mean intensity over all pixels and take mass which has highest mean maxmassrow1 = rowMeans(spectra(msidata)[mzrowdown1:mzrowup1,]) - maxmass1 = mz(msidata)[mzrowdown1:mzrowup1][which.max(maxmassrow1)] + maxmass1 = mz(msidata)[mzrowdown1:mzrowup1][which.max(maxmassrow1)] } if (mzrowdown2 == mzrowup2) { - maxmassrow2 = spectra(msidata)[mzrowup2,] - maxmass2 = mz(msidata)[mzrowup2][which.max(maxmassrow2)] + maxmass2 = mz(msidata)[mzrowup2] }else{ maxmassrow2 = rowMeans(spectra(msidata)[mzrowdown2:mzrowup2,]) maxmass2 = mz(msidata)[mzrowdown2:mzrowup2][which.max(maxmassrow2)] } + ### plot the part which was chosen, with chosen value in blue, distance in blue, maxmass in red, xlim fixed to 5 Da window par(mfrow=c(2,1), oma=c(0,0,2,0)) plot(msidata[mzdown1:mzup1,], pixel = 1:pixelcount, main=paste0("average spectrum ", mass1, " Da")) abline(v=c(mass1-distance, mass1, mass1+distance), col="blue",lty=c(3,5,3)) @@ -339,7 +345,6 @@ mass1vector = spectra(msidata)[features(msidata, mz = maxmass1),] mass2vector = spectra(msidata)[features(msidata, mz = maxmass2),] - foldchange = log2(mass1vector/mass2vector) ratiomatrix = cbind(foldchange, coord(msidata)) @@ -360,9 +365,9 @@ if (length(inputmasses) != 0) { for (mass in 1:length(inputmasses)) { - image(msidata, mz=inputmasses[mass], plusminus=$plusminusinDalton, - main= paste0("3", LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2)," ± ", $plusminusinDalton, " Da)"), - contrast.enhance = "histogram", ylim=c(maxy+1, 0)) + image(msidata, mz=inputmasses[mass], plusminus=$plusminus_dalton, + main= paste0("3", LETTERS[mass], ") ", inputnames[mass], " (", round(inputmasses[mass], digits = 2)," ± ", $plusminus_dalton, " Da)"), + contrast.enhance = "histogram", ylim=c(maximumy+2, 0)) } } else {print("3) The inputpeptide masses were outside the mass range")} @@ -381,7 +386,7 @@ ## 5) TIC image TICcoordarray=cbind(coord(msidata), TICs) - colo <- colorRampPalette( + colo = colorRampPalette( c("blue", "cyan", "green", "yellow","red")) print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo) + geom_tile() + coord_fixed() @@ -414,10 +419,10 @@ secondhighestmz_pixel = which(round(highestmz_matrix\$highestmzinDa, digits=0) == secondhighestmz)[1] ## 7) pca image for two components - pca <- PCA(msidata, ncomp=2) + pca = PCA(msidata, ncomp=2) par(mfrow = c(2,1)) plot(pca, col=c("black", "darkgrey"), main="7) PCA for two components") - image(pca, col=c("black", "white"),ylim=c(maxy+1, 0)) + image(pca, col=c("black", "white"),ylim=c(maximumy+2, 0)) ############################# III) properties over acquisition (spectra index)########## @@ -436,7 +441,7 @@ title(ylab="Frequency = # spectra", line=4) abline(v=median(peaksperpixel), col="blue") - ## 9a) TIC per spectrum - density scatterplot + ## 9a) TIC per spectrum - density scatterplot zero=0 par(mfrow = c(2,1), mar=c(5,6,4,2)) plot_colorByDensity(pixels(msidata), TICs, ylab = "", xlab = "", main="9a) TIC per pixel") @@ -533,7 +538,7 @@ plot(msidata, pixel = secondhighestmz_pixel, main= paste0("Spectrum at ", rownames(coord(msidata)[secondhighestmz_pixel,]))) ## 15) Zoomed in mass spectra for calibrants - plusminusvalue = $plusminusinDalton + plusminusvalue = $plusminus_dalton x = 1 if (length(inputcalibrantmasses) != 0) { @@ -545,7 +550,7 @@ par(mfrow = c(2, 2), oma=c(0,0,2,0)) plot(msidata[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main= "average spectrum") abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) - plot(msidata[minmasspixel:maxmasspixel,], pixel =round(pixelnumber/2, digits=0), main="pixel in middle of acquisition") + plot(msidata[minmasspixel:maxmasspixel,], pixel =round(length(pixelnumber)/2, digits=0), main="pixel in middle of acquisition") abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) plot(msidata[minmasspixel:maxmasspixel,], pixel = highestmz_pixel,main= paste0("Spectrum at ", rownames(coord(msidata)[highestmz_pixel,]))) abline(v=c(calibrant-plusminusvalue, calibrant,calibrant+plusminusvalue), col="blue", lty=c(3,5,3)) @@ -569,12 +574,12 @@ <param name="infile" type="data" format="imzml,rdata,analyze75" label="Inputfile as imzML, Analyze7.5 or Cardinal MSImageSet saved as RData" help="Upload composite datatype imzml (ibd+imzML) or analyze75 (hdr+img+t2m) or regular upload .RData (Cardinal MSImageSet)"/> <param name="filename" type="text" value="" optional="true" label="Title" help="will appear in the quality report. If nothing given it will take the dataset name."/> - <param name="inputpeptidefile" type="data" optional="true" format="txt, csv" label="Text file with peptidemasses and names" + <param name="peptide_file" type="data" optional="true" format="tabular" label="Text file with peptidemasses and names" help="first column peptide m/z, second column peptide name, tab separated file"/> - <param name="inputcalibrants" type="data" optional="true" format="txt,csv" + <param name="calibrant_file" type="data" optional="true" format="tabular" label="Internal calibrants" help="Used for plot number of calibrant per spectrum and for zoomed in mass spectra"/> - <param name="plusminusinDalton" value="0.25" type="text" label="Mass range" help="plusminus mass window in Dalton"/> + <param name="plusminus_dalton" value="0.25" type="text" label="Mass range" help="plusminus mass window in Dalton"/> <repeat name="calibrantratio" title="Plot fold change of two masses for each spectrum" min="0" max="10"> <param name="mass1" value="1111" type="float" label="Mass 1" help="First mass in Dalton"/> <param name="mass2" value="2222" type="float" label="Mass 2" help="Second mass in Dalton"/> @@ -592,14 +597,15 @@ <composite_data value="Example_Continuous.imzML" /> <composite_data value="Example_Continuous.ibd" /> </param> - <param name="inputpeptidefile" value="inputpeptides.csv" ftype="csv"/> - <param name="inputcalibrants" ftype="txt" value="inputcalibrantfile1.txt"/> - <param name="plusminusinDalton" value="0.25"/> + <param name="peptide_file" value="inputpeptides.csv" ftype="csv"/> + <param name="calibrant_file" ftype="txt" value="inputcalibrantfile1.txt"/> + <param name="plusminus_dalton" value="0.25"/> <param name="filename" value="Testfile_imzml"/> <repeat name="calibrantratio"> <param name="mass1" value="111"/> <param name="mass2" value="222"/> <param name="distance" value="0.25"/> + <param name="filenameratioplot" value = "Ratio of mass1 (111) / mass2 (222)"/> </repeat> <output name="plots" file="Testfile_qualitycontrol_imzml.pdf" compare="sim_size" delta="20000"/> </test> @@ -610,34 +616,34 @@ <composite_data value="Analyze75.img"/> <composite_data value="Analyze75.t2m"/> </param> - <param name="inputpeptidefile" value="inputpeptides.txt" ftype="txt"/> - <param name="inputcalibrants" ftype="txt" value="inputcalibrantfile2.txt"/> - <param name="plusminusinDalton" value="0.5"/> + <param name="peptide_file" value="inputpeptides.txt" ftype="txt"/> + <param name="calibrant_file" ftype="txt" value="inputcalibrantfile2.txt"/> + <param name="plusminus_dalton" value="0.5"/> <param name="filename" value="Testfile_analyze75"/> <output name="plots" file="Testfile_qualitycontrol_analyze75.pdf" compare="sim_size" delta="20000"/> </test> <test> <param name="infile" value="preprocessing_results1.RData" ftype="rdata"/> - <param name="inputpeptidefile" value="inputpeptides.csv" ftype="txt"/> - <param name="inputcalibrants" ftype="txt" value="inputcalibrantfile1.txt"/> - <param name="plusminusinDalton" value="0.1"/> + <param name="peptide_file" value="inputpeptides.csv" ftype="txt"/> + <param name="calibrant_file" ftype="txt" value="inputcalibrantfile1.txt"/> + <param name="plusminus_dalton" value="0.1"/> <param name="filename" value="Testfile_rdata"/> <output name="plots" file="Testfile_qualitycontrol_rdata.pdf" compare="sim_size" delta="20000"/> </test> <test> <param name="infile" value="LM8_file16.rdata" ftype="rdata"/> - <param name="inputpeptidefile" value="inputpeptides.txt" ftype="txt"/> - <param name="inputcalibrants" ftype="txt" value="inputcalibrantfile2.txt"/> - <param name="plusminusinDalton" value="0.1"/> + <param name="peptide_file" value="inputpeptides.txt" ftype="txt"/> + <param name="calibrant_file" ftype="txt" value="inputcalibrantfile2.txt"/> + <param name="plusminus_dalton" value="0.1"/> <param name="filename" value="Testfile_rdata"/> <output name="plots" file="LM8_file16output.pdf" compare="sim_size" delta="20000"/> </test> </tests> <help> <![CDATA[ -Quality control for maldi imaging mass spectrometry data. - +Quality control for maldi imaging mass spectrometry data. The output of this tool contains key values and plots of the imaging data as pdf. +For additional beautiful heatmap images use the MSI ion images tool and to plot more mass spectra use the MSI massspectra tool. Input data: 3 types of input data can be used: @@ -645,7 +651,7 @@ - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) -The output of this tool contains key values and plots of the imaging data as pdf. + ]]> </help>
