Mercurial > repos > galaxyp > maldi_quant_preprocessing
changeset 2:3f3812d36027 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit d2f311f7fff24e54c565127c40414de708e31b3c
| author | galaxyp | 
|---|---|
| date | Thu, 25 Oct 2018 07:23:20 -0400 | 
| parents | 18f04cc43f7e | 
| children | aa81463e21ea | 
| files | maldi_macros.xml maldi_quant_preprocessing.xml test-data/Preprocessing1_QC.pdf test-data/Preprocessing2_QC.pdf test-data/Preprocessing3_QC.pdf test-data/intensity_matrix4.tabular test-data/masspeaks4.tabular test-data/outfile1.ibd test-data/outfile1.imzML test-data/outfile2.ibd test-data/outfile2.imzML test-data/outfile3.ibd test-data/outfile3.imzML test-data/peakdetection1_QC.pdf test-data/peakdetection2_QC.pdf test-data/peakdetection3_QC.pdf test-data/peakdetection4_QC.pdf test-data/testfile_squares.rdata | 
| diffstat | 18 files changed, 2709 insertions(+), 74 deletions(-) [+] | 
line wrap: on
 line diff
--- a/maldi_macros.xml Mon Oct 01 01:03:01 2018 -0400 +++ b/maldi_macros.xml Thu Oct 25 07:23:20 2018 -0400 @@ -29,6 +29,7 @@ <xml name="citation"> <citations> <citation type="doi">10.1093/bioinformatics/bts447</citation> + <citation type="doi">10.1007/978-3-319-45809-0_6</citation> </citations> </xml> </macros>
--- a/maldi_quant_preprocessing.xml Mon Oct 01 01:03:01 2018 -0400 +++ b/maldi_quant_preprocessing.xml Thu Oct 25 07:23:20 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="@VERSION@.1"> +<tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="@VERSION@.2"> <description> Preprocessing of mass-spectrometry imaging data </description> @@ -8,6 +8,7 @@ <expand macro="requirements"/> <command detect_errors="exit_code"> <![CDATA[ + cat '${maldi_quant_preprocessing}' && #if $infile.ext == 'imzml' cp '${infile.extra_files_path}/imzml' infile.imzML && cp '${infile.extra_files_path}/ibd' infile.ibd && @@ -22,6 +23,7 @@ ln -s $infile infile.RData && #end if Rscript "${maldi_quant_preprocessing}" && + mkdir $outfile_imzml.files_path && mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && @@ -54,7 +56,7 @@ coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) #elif $infile.ext == 'analyze75' ## Import analyze7.5 file - maldi_data = import( 'infile.hdr' ) + maldi_data = importAnalyze( 'infile.hdr' ) coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) #else loadRData <- function(fileName){ @@ -142,10 +144,11 @@ pixel_number = length(maldi_data) minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) -maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) +mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) +number_features = length(unique(unlist(lapply(maldi_data,mass)))) medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) -inputdata = c(minmz, maxmz,maxfeatures, medint) -QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medint)) +inputdata = c(minmz, maxmz,number_features,mean_features, medint) +QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint)) vectorofactions = "inputdata" @@ -162,9 +165,10 @@ pixel_number = length(maldi_data) minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - transformed = c(minmz, maxmz,maxfeatures, medint) + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + transformed = c(minmz, maxmz,number_features,mean_features, medint) QC_numbers= cbind(QC_numbers, transformed) vectorofactions = append(vectorofactions, "transformed") @@ -196,9 +200,10 @@ pixel_number = length(maldi_data) minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - smoothed = c(minmz, maxmz,maxfeatures, medint) + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + smoothed = c(minmz, maxmz,number_features,mean_features, medint) QC_numbers= cbind(QC_numbers, smoothed) vectorofactions = append(vectorofactions, "smoothed") @@ -251,9 +256,10 @@ pixel_number = length(maldi_data) minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - baseline_removed = c(minmz, maxmz,maxfeatures, medint) + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + baseline_removed = c(minmz, maxmz,number_features,mean_features, medint) QC_numbers= cbind(QC_numbers, baseline_removed) vectorofactions = append(vectorofactions, "baseline_removed") @@ -263,13 +269,13 @@ print('calibrate') ##calibrate - #if $method.methods_conditional.mass_start != 0 and $method.methods_conditional.mass_end != 0: - ## calibrate only given m/z range - maldi_data = calibrateIntensity(maldi_data, - method="$method.methods_conditional.calibrate_method", - range=c($method.methods_conditional.mass_start, $method.methods_conditional.mass_end)) + #if str($method.methods_conditional.cond_calibration_range) == "yes": + ## calibrate only given m/z range + maldi_data = calibrateIntensity(maldi_data, + method="$method.methods_conditional.calibrate_method", + range=c($method.methods_conditional.cond_calibration_range.mass_start, $method.methods_conditional.cond_calibration_range.mass_end)) #else: - maldi_data = calibrateIntensity(maldi_data, + maldi_data = calibrateIntensity(maldi_data, method="$method.methods_conditional.calibrate_method") #end if ## QC plot and numbers @@ -278,10 +284,11 @@ pixel_number = length(maldi_data) minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - intensity_calibrated = c(minmz, maxmz,maxfeatures, medint) - QC_numbers= cbind(QC_numbers, intensity_calibrated ) + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + intensity_calibrated = c(minmz, maxmz,number_features,mean_features, medint) + QC_numbers= cbind(QC_numbers, intensity_calibrated) vectorofactions = append(vectorofactions, "intensity_calibrated ") @@ -333,16 +340,17 @@ pixel_number = length(maldi_data) minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - spectra_aligned = c(minmz, maxmz,maxfeatures, medint) - QC_numbers= cbind(QC_numbers, spectra_aligned ) + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + spectra_aligned = c(minmz, maxmz,number_features,mean_features, medint) + QC_numbers= cbind(QC_numbers, spectra_aligned) vectorofactions = append(vectorofactions, "spectra_aligned") #end if #end for -rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\nintensity") +rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# features", "median\nintensity") plot(0,type='n',axes=FALSE,ann=FALSE) grid.table(t(QC_numbers)) @@ -356,17 +364,13 @@ MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed) #end if - ## export annotation tabular file - #if str($tabular_annotation.load_annotation) == 'yes_annotation': - write.table(merged_annotation, file="$annotation_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") - #end if }else{"All spectra are empty, outputfiles will be empty,too."} ]]> </configfile> </configfiles> <inputs> - <param name="infile" type="data" format="imzml,rdata" label="Inputfile as imzML or Cardinal MSImageSet saved as RData" help="This file is in imzML format or Cardinal MSImageSet saved as RData. The file must be in profile mode, not centroided"/> + <param name="infile" type="data" format="imzml,rdata,analyze75" label="Inputfile as imzML or Cardinal MSImageSet saved as RData" help="This file is in imzML format or Cardinal MSImageSet saved as RData. The file must be in profile mode, not centroided"/> <conditional name="restriction_conditional"> <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files"> <option value="no_restriction" selected="True">Calculate on entire file</option> @@ -374,7 +378,7 @@ </param> <when value="restrict"> <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/> - <param name="coordinates_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> + <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> </when> <when value="no_restriction"/> </conditional> @@ -389,7 +393,7 @@ <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/> <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/> - <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> + <param name="tabular_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> </when> <when value="no_annotation"/> </conditional> @@ -404,7 +408,7 @@ <validator type="empty_field" /> </param> <when value="Transformation"> - <param name="transform_method" type="select" label="Select the transfprormation method"> + <param name="transform_method" type="select" label="Select a transfprormation method"> <option value="sqrt" selected="True">sqrt</option> <option value="log">log</option> <option value="log2">log2</option> @@ -419,18 +423,18 @@ <option value="MovingAverage">MovingAverage</option> </param> <when value="SavitzkyGolay"> - <param name="polynomial" value="3" type="text" label="PolynomialOrder argument to control the order of the filter"/> + <param name="polynomial" value="3" type="text" label="PolynomialOrder argument to control the order of the filter" + help="should be smaller than the resulting window"/> </when> <when value="MovingAverage"> - <param name="weighted" type="boolean" label="Weighted average" help = "indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/> + <param name="weighted" type="boolean" label="Weighted average" help = "Indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/> </when> </conditional> <param name="halfWindowSize" type="integer" value="10" - label="Half window size" + label="Half window size (number of data points)" help="The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] - (window size is 2*halfWindowSize+1). - The best size differs depending on the selected smoothing method."/> + (window size is 2*halfWindowSize+1)."/> </when> <when value="Baseline"> <conditional name="methods_for_baseline"> @@ -447,29 +451,40 @@ </when> <when value="TopHat"> <param name="tophat_halfWindowSize" type="integer" value="10" - label="Half window size" help="The resulting window reaches from + label="Half window size (number of data points)" + help="The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> </when> <when value="ConvexHull"/> <when value="median"> <param name="median_halfWindowSize" type="integer" value="10" - label="Half window size" help="The resulting window reaches from + label="Half window size (number of data points)" + help="The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> </when> </conditional> </when> <when value="Calibrate"> - <param name="calibrate_method" type="select" label="Calibration method"> + <param name="calibrate_method" type="select" label="Intensity calibration (normalization) method"> <option value="TIC" selected="True">TIC</option> <option value="PQN">PQN</option> <option value="median">median</option> <validator type="empty_field" /> </param> - <param name="mass_start" type="integer" value="0" - label="Start of m/z range, has to be inside m/z range" - help="Scaling factor is calculated on the mass range and applied to the whole spectrum. Start and end are not allowed to be 0"/> - <param name="mass_end" type="integer" value="0" - label="End of m/z range, has to be inside m/z range"/> + <conditional name="cond_calibration_range"> + <param name="calibration_range" type="select" label="Instead of the whole m/z range, a specified m/z range can be used to calculate the scaling factor"> + <option value="no" selected="True">complete m/z range</option> + <option value="yes">specify a m/z range</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="mass_start" type="integer" value="800" + label="Start of m/z range, has to be inside m/z range" + help="Scaling factor is calculated on the mass range and applied to the whole spectrum."/> + <param name="mass_end" type="integer" value="3000" + label="End of m/z range, has to be inside m/z range"/> + </when> + </conditional> </when> <when value="Align"> <param name="warping_method" type="select" label="Warping methods"> @@ -479,12 +494,12 @@ <option value="cubic">Cubic</option> </param> - <param name="tolerance" type="float" value="0.002" - label="Tolerance" - help="Double, maximal relative deviation of a peak position (m/z) to be considered as identical" /> + <param name="tolerance" type="float" value="0.00005" + label="Tolerance = abs(mz1 - mz2)/mz2" + help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 50e-6" /> <param name="halfWindowSize" type="integer" value="20" - label="Half window size" + label="Half window size (number of data points)" help="The resulting window reaches from mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] (window size is 2*halfWindowSize+1). @@ -492,7 +507,7 @@ <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio"/> <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/> - <param name="empty_nomatch" type="boolean" label="logical, if TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/> + <param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/> <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/> <conditional name="reference_for_alignment"> @@ -503,9 +518,9 @@ <when value="no_reference"/> <when value="yes_reference"> <param name="reference_file" type="data" format="tabular" - label="Tabular file with m/z of internal calibrants (MassPeaks) which should be used for spectra alignment" - help="calibration of m/z values to internal calibrants, at least 2 m/z per spectrum are needed"/> - <param name="reference_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> + label="Tabular file with m/z (MassPeaks) which should be used for spectra alignment" + help="At least 2 reference m/z per spectrum are needed"/> + <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> </when> </conditional> </when> @@ -514,11 +529,8 @@ <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/> </inputs> <outputs> - <data format="imzml" name="outfile_imzml" label="$infile.display_name preprocessed" /> - <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="$infile.display_name preprocessed QC"/> - <data format="tabular" name="annotation_output" label="$infile.display_name annotations"> - <filter>tabular_annotation["load_annotation"] == 'yes_annotation'</filter> - </data> + <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}" /> + <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="${tool.name} on ${on_string}: QC"/> </outputs> <tests> <test> @@ -569,6 +581,7 @@ <param name="method" value="Align"/> <param name="warping_method" value="linear"/> <param name="halfWindowSize" value="1"/> + <param name="tolerance" value="0.002"/> <param name="allow_nomatch" value="TRUE"/> <param name="remove_empty" value="TRUE"/> <param name="empty_nomatch" value="TRUE"/> @@ -580,7 +593,6 @@ <output name="outfile_imzml" file="outfile3.imzML" compare="sim_size"/> <output name="outfile_imzml" file="outfile3.ibd" compare="sim_size"/> <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/> - <output name="annotation_output" file="annotations_output3.tabular"/> </test> </tests> <help><