Mercurial > repos > computational-metabolomics > mspurity_spectralmatching
comparison spectralMatching.xml @ 0:a8ab07c27338 draft default tip
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
| author | computational-metabolomics |
|---|---|
| date | Thu, 04 Mar 2021 12:20:23 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a8ab07c27338 |
|---|---|
| 1 <tool id="mspurity_spectralmatching" name="msPurity.spectralMatching" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@"> | |
| 2 <description> | |
| 3 Perform spectral matching to MS/MS spectral libraries | |
| 4 </description> | |
| 5 <macros> | |
| 6 <import>macros.xml</import> | |
| 7 </macros> | |
| 8 <expand macro="requirements"/> | |
| 9 <command detect_errors="exit_code"><![CDATA[ | |
| 10 Rscript '$__tool_directory__/spectralMatching.R' | |
| 11 --outDir=. | |
| 12 --cores=\${GALAXY_SLOTS:-4} | |
| 13 | |
| 14 #if $Query.q_dbPth_con.q_dbPth_select == 'msPurityData' | |
| 15 --q_defaultDb | |
| 16 #else if $Query.q_dbPth_con.q_dbPth_select == 'sqlite' | |
| 17 --q_dbPth=$Query.q_dbPth_con.q_dbPth | |
| 18 #end if | |
| 19 | |
| 20 #if $Library.l_dbPth_con.l_dbPth_select == 'msPurityData' | |
| 21 --l_defaultDb | |
| 22 #else if $Library.l_dbPth_con.l_dbPth_select == 'sqlite' | |
| 23 --l_dbPth=$Library.l_dbPth_con.l_dbPth | |
| 24 #end if | |
| 25 | |
| 26 --l_dbType=$Library.l_dbPth_con.l_dbPth_select | |
| 27 --q_dbType=$Query.q_dbPth_con.q_dbPth_select | |
| 28 | |
| 29 | |
| 30 --q_ppmPrec=$Query.q_filters.q_ppmPrec | |
| 31 --l_ppmPrec=$Library.l_filters.l_ppmPrec | |
| 32 | |
| 33 --q_ppmProd=$Query.q_filters.q_ppmProd | |
| 34 --l_ppmProd=$Library.l_filters.l_ppmProd | |
| 35 | |
| 36 | |
| 37 #if $Query.q_filters.q_raThres_cond.q_raThres_bool | |
| 38 --q_raThres=$Query.q_filters.q_raThres_cond.q_raThres | |
| 39 #end if | |
| 40 | |
| 41 #if $Library.l_filters.l_raThres_cond.l_raThres_bool | |
| 42 --l_raThres=$Library.l_filters.l_raThres_cond.l_raThres | |
| 43 #end if | |
| 44 | |
| 45 #if $Query.q_filters.q_polarity_cond.q_polarity_bool | |
| 46 --q_polarity=$Query.q_filters.q_polarity_cond.q_polarity | |
| 47 #end if | |
| 48 | |
| 49 #if $Library.l_filters.l_polarity_cond.l_polarity_bool | |
| 50 --l_polarity=$Library.l_filters.l_polarity_cond.l_polarity | |
| 51 #end if | |
| 52 | |
| 53 #if $Query.q_filters.q_purity_cond.q_purity_bool | |
| 54 --q_purity=$Query.q_filters.q_purity_cond.q_purity | |
| 55 #end if | |
| 56 | |
| 57 #if $Library.l_filters.l_purity_cond.l_purity_bool | |
| 58 --l_purity=$Library.l_filters.l_purity_cond.l_purity | |
| 59 #end if | |
| 60 | |
| 61 #if $Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups_bool | |
| 62 --q_xcmsGroups=$Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups | |
| 63 #end if | |
| 64 | |
| 65 #if $Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups_bool | |
| 66 --l_xcmsGroups=$Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups | |
| 67 #end if | |
| 68 | |
| 69 #if $Query.q_filters.q_pids_cond.q_pids_bool | |
| 70 --q_pids=$Query.q_filters.q_pids_cond.q_pids | |
| 71 #end if | |
| 72 | |
| 73 #if $Library.l_filters.l_pids_cond.l_pids_bool | |
| 74 --l_pids=$Library.l_filters.l_pids_cond.l_pids | |
| 75 #end if | |
| 76 | |
| 77 #if $Query.q_filters.q_rtrange_cond.q_rtrange_bool | |
| 78 --q_rtrangeMin=$Query.q_filters.q_rtrange_cond.q_rtrangeMin | |
| 79 --q_rtrangeMax=$Query.q_filters.q_rtrange_cond.q_rtrangeMax | |
| 80 #end if | |
| 81 | |
| 82 #if $Library.l_filters.l_rtrange_cond.l_rtrange_bool | |
| 83 --l_rtrangeMin=$Library.l_filters.l_rtrange_cond.l_rtrangeMin | |
| 84 --l_rtrangeMax=$Library.l_filters.l_rtrange_cond.l_rtrangeMax | |
| 85 #end if | |
| 86 | |
| 87 #if $Query.q_filters.q_accessions_cond.q_accessions_bool | |
| 88 --q_accessions=$Query.q_filters.q_accessions_cond.q_accessions | |
| 89 #end if | |
| 90 | |
| 91 #if $Library.l_filters.l_accessions_cond.l_accessions_bool | |
| 92 --l_accessions=$Library.l_filters.l_accessions_cond.l_accessions | |
| 93 #end if | |
| 94 | |
| 95 | |
| 96 #if $Query.q_filters.q_sources_cond.q_sources_bool | |
| 97 --q_sources=$Query.q_filters.q_sources_cond.q_sources | |
| 98 --q_sourcesUser='$Query.q_filters.q_sources_cond.q_sourcesUser' | |
| 99 #end if | |
| 100 | |
| 101 #if $Library.l_filters.l_sources_cond.l_sources_bool | |
| 102 --l_sources=$Library.l_filters.l_sources_cond.l_sources | |
| 103 --l_sourcesUser='$Library.l_filters.l_sources_cond.l_sourcesUser' | |
| 104 #end if | |
| 105 | |
| 106 #if $Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes_bool | |
| 107 --q_instrumentTypes='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes' | |
| 108 --q_instrumentTypesUser='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypesUser' | |
| 109 #end if | |
| 110 | |
| 111 #if $Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes_bool | |
| 112 --l_instrumentTypes='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes' | |
| 113 --l_instrumentTypesUser='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypesUser' | |
| 114 #end if | |
| 115 | |
| 116 #if $Query.q_filters.q_instruments_cond.q_instruments_bool | |
| 117 --q_instruments=$Query.q_filters.q_instruments_cond.q_instruments | |
| 118 #end if | |
| 119 | |
| 120 #if $Library.l_filters.l_instruments_cond.l_instruments_bool | |
| 121 --l_instruments='$Library.l_filters.l_instruments_cond.l_instruments' | |
| 122 #end if | |
| 123 | |
| 124 #if $Query.q_filters.q_spectraTypes_cond.q_spectraTypes_bool | |
| 125 --q_spectraTypes=$Query.q_filters.q_spectraTypes_cond.q_spectraTypes | |
| 126 #end if | |
| 127 | |
| 128 #if $Library.l_filters.l_spectraTypes_cond.l_spectraTypes_bool | |
| 129 --l_spectraTypes=$Library.l_filters.l_spectraTypes_cond.l_spectraTypes | |
| 130 #end if | |
| 131 | |
| 132 #if $Query.q_filters.q_spectraFilter | |
| 133 --q_spectraFilter | |
| 134 #end if | |
| 135 | |
| 136 #if $Library.l_filters.l_spectraFilter | |
| 137 --l_spectraFilter | |
| 138 #end if | |
| 139 | |
| 140 #if $General.rttol_cond.rttol_bool | |
| 141 --rttol=$General.rttol_cond.rttol | |
| 142 #end if | |
| 143 | |
| 144 --raW=$General.raW | |
| 145 --mzW=$General.mzW | |
| 146 | |
| 147 #if $General.updateDb_cond.updateDb | |
| 148 --updateDb | |
| 149 #if $General.updateDb_cond.copyDb | |
| 150 --copyDb | |
| 151 #end if | |
| 152 #end if | |
| 153 | |
| 154 #if $General.usePrecursors | |
| 155 --usePrecursors | |
| 156 #end if | |
| 157 | |
| 158 ]]></command> | |
| 159 <inputs> | |
| 160 <section name="Query" title="Query spectra input and filters" expanded="True"> | |
| 161 <expand macro="sm_input" ql="Query" ql_shrt = "q" user="True" mspuritydatalib="False" msp="False" | |
| 162 help="Query SQLite database - in the standard XCMS msPurity workflow - the output | |
| 163 of msPurity.createDatabase should be used here. However any SQLite database | |
| 164 following the schema of as https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/> | |
| 165 <expand macro="filters" ql="Query" ql_shrt="q"/> | |
| 166 </section> | |
| 167 <section name="Library" title="Library spectra input and filters" expanded="True"> | |
| 168 <expand macro="sm_input" ql="Library" ql_shrt = "l" user="False" mspuritydatalib="True" msp="False" | |
| 169 help="Library SQLite database - in the standard XCMS msPurity workflow - a default | |
| 170 database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite | |
| 171 database following the schema of https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/> | |
| 172 <expand macro="filters" ql="Library" ql_shrt="l"/> | |
| 173 </section> | |
| 174 <section name="General" title="General arguments" expanded="False"> | |
| 175 <conditional name="rttol_cond"> | |
| 176 <param name="rttol_bool" type="boolean" label="Filter on retention time match?" | |
| 177 help="" /> | |
| 178 <when value="true"> | |
| 179 <param name="rttol" type="float" value="30" min="0" | |
| 180 label="Retention time tolerance (seconds)" | |
| 181 help="Retention time tolerance in seconds to match precursors"/> | |
| 182 </when> | |
| 183 <when value="false"/> | |
| 184 </conditional> | |
| 185 <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?" | |
| 186 help="If True, spectra will be filtered by similarity of precursors based on | |
| 187 the library and query ppm defined tolerance" /> | |
| 188 <param name="raW" label="Weighting for relative abundance" type="float" value="0.5" | |
| 189 help="Relative abundance weight for spectra (default to 0.5 as determined by | |
| 190 massbank for ESI data)"/> | |
| 191 <param name="mzW" label="Weighting for mz" type="float" min="0" value="2" | |
| 192 help="mz weight for spectra (default to 2 as determined by massbank for ESI data)"/> | |
| 193 <conditional name="updateDb_cond"> | |
| 194 <param name="updateDb" type="boolean" checked="true" | |
| 195 label="Update database with results?" help="" /> | |
| 196 <when value="true"> | |
| 197 <param name="copyDb" type="boolean" checked="true" | |
| 198 label="Make a copy of the database?" | |
| 199 help="A copy will be made of the input SQLite target database and the | |
| 200 results will be added to this copy. When False, the input SQLite | |
| 201 database will be updated with the matching results. Use False if | |
| 202 you want to reduce storage space being used."/> | |
| 203 </when> | |
| 204 <when value="false"/> | |
| 205 </conditional> | |
| 206 </section> | |
| 207 </inputs> | |
| 208 | |
| 209 <outputs> | |
| 210 <data name="sqlite_results" format="sqlite" label="${tool.name} on ${on_string}: SQLite results" | |
| 211 from_work_dir="db_with_spectral_matching.sqlite" > | |
| 212 <filter>create_new_database is True</filter> | |
| 213 </data> | |
| 214 <data name="matches" format="tsv" label="${tool.name} on ${on_string}: matches" | |
| 215 from_work_dir="matched_results.tsv" > | |
| 216 <filter>spectra_type_q == "scans"</filter> | |
| 217 </data> | |
| 218 <data name="xcms_matches" format="tsv" label="${tool.name} on ${on_string}: XCMS matches" | |
| 219 from_work_dir="xcms_matched_results.tsv" /> | |
| 220 </outputs> | |
| 221 <tests> | |
| 222 <test> | |
| 223 <param name="q_dbPth" value="createDatabase_output.sqlite" /> | |
| 224 <param name="q_spectraTypes_bool" value="true" /> | |
| 225 <param name="q_spectraTypes" value="inter,av_all" /> | |
| 226 <param name="l_dbPth_select" value="userdb" /> | |
| 227 <param name="l_dbPth" value="PR100037.sqlite" /> | |
| 228 <param name="q_xcmsGroups_bool" value="true" /> | |
| 229 <param name="l_accessions_bool" value="true" /> | |
| 230 <param name="q_xcmsGroups" value="14" /> | |
| 231 <param name="l_accessions" value="PR100037" /> | |
| 232 <output name="xcms_matches" file="spectralMatching_matched_results.tsv" /> | |
| 233 <output name="matches" file="spectralMatching_xcms_matched_results.tsv" /> | |
| 234 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching.sqlite" ftype="sqlite" compare="sim_size"/> | |
| 235 </test> | |
| 236 <test> | |
| 237 <param name="l_instrumentTypes_bool" value="true" /> | |
| 238 <param name="q_dbPth" value="createDatabase_output.sqlite" /> | |
| 239 <param name="q_spectraTypes_bool" value="true" /> | |
| 240 <param name="q_spectraTypes" value="inter,av_all" /> | |
| 241 <param name="l_dbPth_select" value="userdb" /> | |
| 242 <param name="l_dbPth" value="PR100037.sqlite" /> | |
| 243 <param name="q_xcmsGroups_bool" value="true" /> | |
| 244 <param name="l_accessions_bool" value="true" /> | |
| 245 <param name="q_xcmsGroups" value="14" /> | |
| 246 <param name="l_accessions" value="PR100037" /> | |
| 247 <output name="xcms_matches" file="spectralMatching_matched_results_instrumentTypes.tsv" /> | |
| 248 <output name="matches" file="spectralMatching_xcms_matched_results_instrumentTypes.tsv" /> | |
| 249 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite" ftype="sqlite" compare="sim_size"/> | |
| 250 </test> | |
| 251 </tests> | |
| 252 | |
| 253 <help><![CDATA[ | |
| 254 | |
| 255 ============================================================= | |
| 256 Spectral matching | |
| 257 ============================================================= | |
| 258 ----------- | |
| 259 General | |
| 260 ----------- | |
| 261 | |
| 262 | |
| 263 Perform spectral matching to spectral libraries for an LC-MS/MS dataset. | |
| 264 | |
| 265 The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database. | |
| 266 | |
| 267 The SQLite schema of the spectral database here: spectral_database_schema_ | |
| 268 | |
| 269 | |
| 270 The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity | |
| 271 function createDatabase as part of a msPurity-XCMS data processing workflow. | |
| 272 | |
| 273 The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources. | |
| 274 The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS. | |
| 275 A larger_database_ can be download and used from the msp2db github repository. | |
| 276 | |
| 277 To create a user generated library SQLite database the following tool can be used to generate a SQLite database | |
| 278 from a collection of MSP files: msp2db_. | |
| 279 | |
| 280 It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used | |
| 281 for either the library or query - even allowing for the same database to be used. | |
| 282 | |
| 283 The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching, | |
| 284 and summarising the results. | |
| 285 | |
| 286 Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing | |
| 287 the library source, instrument, retention time, precursor PPM tolerance etc). | |
| 288 | |
| 289 The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar | |
| 290 to modified pMatch algorithm described in Zhou et al 2015. | |
| 291 | |
| 292 The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both | |
| 293 the query and library spectra (wq and wl). See below: | |
| 294 | |
| 295 .. math:: | |
| 296 | |
| 297 w=intensity^x \cdot mz^y | |
| 298 | |
| 299 | |
| 300 Where x and y represent weight factors and can be adjusted with the parameters raW and mzW. | |
| 301 Defaults to x=0.5 and y=2 as per MassBank for ESI based mass spectrometry data. | |
| 302 | |
| 303 The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product. | |
| 304 See below for dot product cosine equation. | |
| 305 | |
| 306 .. math:: | |
| 307 | |
| 308 dpc = \frac{ w_q \cdot w_l } { \sqrt{Σ{w_{q}{}^2} } \cdot \sqrt{Σ{w_{l}{}^2}}} | |
| 309 | |
| 310 | |
| 311 Full details of the matching approaches are described in the msPurity_spectral_matching_vignette_ | |
| 312 | |
| 313 -------------------------------------------- | |
| 314 Example LC-MS/MS processing workflow | |
| 315 -------------------------------------------- | |
| 316 | |
| 317 * Purity assessments | |
| 318 + (mzML files) -> purityA -> (pa) | |
| 319 * XCMS processing | |
| 320 + (mzML files) -> xcms.xcmsSet -> xcms.merge -> xcms.group -> xcms.retcor -> xcms.group -> (xset) | |
| 321 * Fragmentation processing | |
| 322 + (xset, pa) -> frag4feature -> filterFragSpectra -> averageAllFragSpectra -> createDatabase -> **spectralMatching** -> (sqlite spectral database) | |
| 323 | |
| 324 ----------- | |
| 325 Output | |
| 326 ----------- | |
| 327 | |
| 328 **Database** | |
| 329 | |
| 330 The updated query database (this will have been updated with the annotation results if updateDb argument used) | |
| 331 | |
| 332 | |
| 333 **xcmsMatchedResults** | |
| 334 | |
| 335 If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will | |
| 336 be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns | |
| 337 | |
| 338 * lpid - id in database of library spectra | |
| 339 * qpid - id in database of query spectra | |
| 340 * dpc - dot product cosine of the match | |
| 341 * rdpc - reverse dot product cosine of the match | |
| 342 * cdpc - composite dot product cosine of the match | |
| 343 * mcount - number of matching peaks | |
| 344 * allcount - total number of peaks across both query and library spectra | |
| 345 * mpercent - percentage of matching peaks across both query and library spectra | |
| 346 * library_rt - retention time of library spectra | |
| 347 * query_rt - retention time of query spectra | |
| 348 * rtdiff - difference between library and query retention time | |
| 349 * library_precursor_mz - library precursor mz | |
| 350 * query_precursor_mz - query precursor mz | |
| 351 * library_precursor_ion_purity - library precursor ion purity | |
| 352 * query_precursor_ion_purity - query precursor ion purity | |
| 353 * library_accession - library accession value (unique string or number given to eith MoNA or Massbank data entires) | |
| 354 * library_precursor_type - library precursor type (i.e. adduct) | |
| 355 * library_entry_name - Name given to the library spectra | |
| 356 * inchikey - inchikey of the matched library spectra | |
| 357 * library_source_name - source of the spectra (e.g. massbank, gnps) | |
| 358 * library_compound_name - name of compound spectra was obtained from | |
| 359 | |
| 360 **matchedResults** | |
| 361 | |
| 362 All matched results from the query spectra to the library spectra. Contains the same as above | |
| 363 without the XCMS details. This table is useful to observe spectral matching results | |
| 364 for all MS/MS spectra irrespective of if they are linked to XCMS MS1 features. | |
| 365 | |
| 366 | |
| 367 .. _spectral_database_schema: https://www.bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html | |
| 368 .. _larger_database: https://github.com/computational-metabolomics/msp2db/releases | |
| 369 .. _msp2db: https://github.com/computational-metabolomics/msp2db/releases | |
| 370 .. _msPurity_spectral_matching_vignette: https://www.bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html | |
| 371 | |
| 372 ]]></help> | |
| 373 | |
| 374 <expand macro="citations"> </expand> | |
| 375 </tool> |
