diff spectralMatching.xml @ 12:e131f2fb0f97 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit bbbcc75d51c020446fd00d76f908a6250266cfea
author tomnl
date Fri, 13 Sep 2019 11:48:31 -0400
parents 103d2613b3a7
children 8dd98b0f83af
line wrap: on
line diff
--- a/spectralMatching.xml	Thu Jun 27 13:08:00 2019 -0400
+++ b/spectralMatching.xml	Fri Sep 13 11:48:31 2019 -0400
@@ -18,55 +18,58 @@
         spectralMatching.R
             --outDir=.
             --cores=\${GALAXY_SLOTS:-4}
-          
 
-            #if $Query.q_dbPth_con.q_dbPth_select == 'userdb'
+            #if $Query.q_dbPth_con.q_dbPth_select == 'msPurityData'
+              --q_defaultDb
+            #else if $Query.q_dbPth_con.q_dbPth_select == 'sqlite'
               --q_dbPth=$Query.q_dbPth_con.q_dbPth
-            #else
-              --q_defaultDb
             #end if
-            
-            #if $Library.l_dbPth_con.l_dbPth_select == 'userdb'
+
+            #if $Library.l_dbPth_con.l_dbPth_select == 'msPurityData'
+              --l_defaultDb
+            #else if $Library.l_dbPth_con.l_dbPth_select == 'userdb_sqlite'
               --l_dbPth=$Library.l_dbPth_con.l_dbPth
-            #else
-              --l_defaultDb
             #end if
-            
+
+            --l_dbType=$Library.l_dbPth_con.l_dbPth_select
+            --q_dbType=$Query.q_dbPth_con.q_dbPth_select
+
+
             --q_ppmPrec=$Query.q_filters.q_ppmPrec
             --l_ppmPrec=$Library.l_filters.l_ppmPrec
-            
+
             --q_ppmProd=$Query.q_filters.q_ppmProd
             --l_ppmProd=$Library.l_filters.l_ppmProd
-            
-            
+
+
             #if $Query.q_filters.q_raThres_cond.q_raThres_bool
               --q_raThres=$Query.q_filters.q_raThres_cond.q_raThres
             #end if
-            
+
             #if $Library.l_filters.l_raThres_cond.l_raThres_bool
               --l_raThres=$Library.l_filters.l_raThres_cond.l_raThres
             #end if
-            
+
             #if $Query.q_filters.q_polarity_cond.q_polarity_bool
               --q_polarity=$Query.q_filters.q_polarity_cond.q_polarity
             #end if
-            
+
             #if $Library.l_filters.l_polarity_cond.l_polarity_bool
               --l_polarity=$Library.l_filters.l_polarity_cond.l_polarity
             #end if
-            
+
             #if $Query.q_filters.q_purity_cond.q_purity_bool
               --q_purity=$Query.q_filters.q_purity_cond.q_purity
             #end if
-            
+
             #if $Library.l_filters.l_purity_cond.l_purity_bool
               --l_purity=$Library.l_filters.l_purity_cond.l_purity
             #end if
-            
+
             #if $Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups_bool
               --q_xcmsGroups=$Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups
             #end if
-            
+
             #if $Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups_bool
               --l_xcmsGroups=$Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups
             #end if
@@ -74,54 +77,54 @@
             #if $Query.q_filters.q_pids_cond.q_pids_bool
               --q_pids=$Query.q_filters.q_pids_cond.q_pids
             #end if
-            
+
             #if $Library.l_filters.l_pids_cond.l_pids_bool
               --l_pids=$Library.l_filters.l_pids_cond.l_pids
             #end if
-            
+
             #if $Query.q_filters.q_rtrange_cond.q_rtrange_bool
               --q_rtrangeMin=$Query.q_filters.q_rtrange_cond.q_rtrangeMin
               --q_rtrangeMax=$Query.q_filters.q_rtrange_cond.q_rtrangeMax
             #end if
-            
+
             #if $Library.l_filters.l_rtrange_cond.l_rtrange_bool
               --l_rtrangeMin=$Library.l_filters.l_rtrange_cond.l_rtrangeMin
               --l_rtrangeMax=$Library.l_filters.l_rtrange_cond.l_rtrangeMax
             #end if
-            
+
             #if $Query.q_filters.q_accessions_cond.q_accessions_bool
               --q_accessions=$Query.q_filters.q_accessions_cond.q_accessions
             #end if
-            
+
             #if $Library.l_filters.l_accessions_cond.l_accessions_bool
               --l_accessions=$Library.l_filters.l_accessions_cond.l_accessions
             #end if
-            
-            
+
+
             #if $Query.q_filters.q_sources_cond.q_sources_bool
               --q_sources=$Query.q_filters.q_sources_cond.q_sources
               --q_sourcesUser=$Query.q_filters.q_sources_cond.q_sourcesUser
             #end if
-            
+
             #if $Library.l_filters.l_sources_cond.l_sources_bool
               --l_sources=$Library.l_filters.l_sources_cond.l_sources
               --l_sourcesUser=$Library.l_filters.l_sources_cond.l_sourcesUser
             #end if
-            
+
             #if $Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes_bool
               --q_instrumentTypes=$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes
               --q_instrumentTypesUser=$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypesUser
             #end if
-            
+
             #if $Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes_bool
               --l_instrumentTypes=$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes
               --l_instrumentTypesUser=$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypesUser
             #end if
-            
+
             #if $Query.q_filters.q_instruments_cond.q_instruments_bool
               --q_instruments=$Query.q_filters.q_instruments_cond.q_instruments
             #end if
-            
+
             #if $Library.l_filters.l_instruments_cond.l_instruments_bool
               --l_instruments=$Library.l_filters.l_instruments_cond.l_instruments
             #end if
@@ -129,7 +132,7 @@
             #if $Query.q_filters.q_spectraTypes_cond.q_spectraTypes_bool
               --q_spectraTypes=$Query.q_filters.q_spectraTypes_cond.q_spectraTypes
             #end if
-            
+
             #if $Library.l_filters.l_spectraTypes_cond.l_spectraTypes_bool
               --l_spectraTypes=$Library.l_filters.l_spectraTypes_cond.l_spectraTypes
             #end if
@@ -137,7 +140,7 @@
             #if $Query.q_filters.q_spectraFilter
               --q_spectraFilter
             #end if
-            
+
             #if $Library.l_filters.l_spectraFilter
               --l_spectraFilter
             #end if
@@ -145,80 +148,80 @@
             #if $General.rttol_cond.rttol_bool
               --rttol=$General.rttol_cond.rttol
             #end if
-            
+
             --raW=$General.raW
             --mzW=$General.mzW
-            
+
             #if $General.updateDb_cond.updateDb
               --updateDb
               #if $General.updateDb_cond.copyDb
                  --copyDb
               #end if
             #end if
-            
+
             #if $General.usePrecursors
                  --usePrecursors
             #end if
-       
+
 
 
     ]]></command>
     <inputs>
-      
+
 
-    
+
     <section name="Query" title="Query spectra input and filters" expanded="True">
         <expand macro="sm_input" ql='Query' ql_shrt = "q" user="True" mspuritydatalib="False" msp="False"
                 help="Query SQLite database - in the standard XCMS msPurity workflow - the output
-                      of msPurity.createDatabase should be used here. However any SQLite database 
+                      of msPurity.createDatabase should be used here. However any SQLite database
                       following the schema of xxx can be used as input"/>
         <expand macro="filters" ql="Query" ql_shrt="q"/>
     </section>
-  
+
     <section name="Library" title="Library spectra input and filters" expanded="True">
         <expand macro="sm_input" ql='Library' ql_shrt = "l" user="False" mspuritydatalib="True" msp="False"
-                help="Library SQLite database - in the standard XCMS msPurity workflow - a default 
-                      database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite 
+                help="Library SQLite database - in the standard XCMS msPurity workflow - a default
+                      database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite
                       database following the schema of xxx can be used as input"/>
         <expand macro="filters" ql="Library"  ql_shrt="l"/>
     </section>
-    
+
     <section name="General" title="General arguments" expanded="False">
         <conditional name="rttol_cond">
-                <param name="rttol_bool" type="boolean" label="Filter on retention time match?" 
+                <param name="rttol_bool" type="boolean" label="Filter on retention time match?"
                        help="" />
                 <when value="true">
-                    <param name="rttol" type="float" value="30" min="0" 
+                    <param name="rttol" type="float" value="30" min="0"
                            label="Retention time tolerance (seconds)"
                            help="Retention time tolerance in seconds to match precursors"/>
                 </when>
                 <when value="false">
                 </when>
         </conditional>
-        
-        
-        <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?" 
-                  help="If True, spectra will be filtered by similarity of precursors based on 
+
+
+        <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?"
+                  help="If True, spectra will be filtered by similarity of precursors based on
                         the library and query ppm defined tolerance" />
-         
-        <param name="raW" label="Weighting for relative abundance" 
+
+        <param name="raW" label="Weighting for relative abundance"
                type="float" value="0.5"
                help="Relative abundance weight for spectra (default to 0.5 as determined by
                      massbank for ESI data)"/>
-          
-        <param name="mzW" label="Weighting for mz" 
+
+        <param name="mzW" label="Weighting for mz"
                type="float" value="2"
                help="mz weight for spectra (default to 2 as determined by massbank for ESI data)"/>
-              
+
         <conditional name="updateDb_cond">
                 <param name="updateDb" type="boolean" checked="true"
                        label="Update database with results?" help="" />
                 <when value="true">
-                    <param name="copyDb" type="boolean" checked="true" 
+                    <param name="copyDb" type="boolean" checked="true"
                            label="Make a copy of the database?"
-                           help="A copy will be made of the input SQLite target database and the 
-                                 results will be added to this copy.  When False, the input SQLite 
-                                 database will be updated  with the matching results. Use False if 
+                           help="A copy will be made of the input SQLite target database and the
+                                 results will be added to this copy.  When False, the input SQLite
+                                 database will be updated  with the matching results. Use False if
                                  you want to reduce storage space being used."/>
                 </when>
                 <when value="false">
@@ -228,8 +231,8 @@
 
 
         </section>
-    
-        
+
+
 
 
     </inputs>
@@ -274,44 +277,44 @@
 -----------
 
 
-Perform spectral matching to spectral libraries for an LC-MS/MS dataset. 
+Perform spectral matching to spectral libraries for an LC-MS/MS dataset.
 
-The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database. 
+The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database.
 
 The SQLite schema of the spectral database here: spectral_database_schema_
 
 
-The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity 
-function createDatabase as part of a msPurity-XCMS data processing workflow. 
+The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity
+function createDatabase as part of a msPurity-XCMS data processing workflow.
 
-The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources. 
-The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS. 
+The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources.
+The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS.
 A larger_database_ can be download and used from the msp2db github repository.
 
-To create a user generated library SQLite database the following tool can be used to generate a SQLite database 
+To create a user generated library SQLite database the following tool can be used to generate a SQLite database
 from a collection of MSP files: msp2db_.
 
-It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used 
-for either the library or query -  even allowing for the same database to be used. 
+It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used
+for either the library or query -  even allowing for the same database to be used.
 
-The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching, 
-and summarising the results. 
+The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching,
+and summarising the results.
 
-Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing 
-the library source, instrument, retention time, precursor PPM tolerance etc). 
+Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing
+the library source, instrument, retention time, precursor PPM tolerance etc).
 
-The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar 
-to modified pMatch algorithm described in Zhou et al 2015. 
+The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar
+to modified pMatch algorithm described in Zhou et al 2015.
 
-The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both 
-the query and library spectra (wq and wl). See below: 
+The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both
+the query and library spectra (wq and wl). See below:
 
 .. math::
 
     w=intensity^x \cdot mz^y
 
 
-Where x and y represent weight factors and can be adjusted with the parameters raW and mzW. 
+Where x and y represent weight factors and can be adjusted with the parameters raW and mzW.
 Defaults to x=0.5 and y=2 as per MassBank for ESI based mass spectrometry data.
 
 The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product.
@@ -343,49 +346,39 @@
 
 The updated query database (this will have been updated with the annotation results if updateDb argument used)
 
-**matchedResults**
-
-All matched results from the query spectra to the library spectra. Contains the following columns
-
-* dpc - dot product cosine of the match
-* rdpc - reverse dot product cosine of the match
-* cdpc - composite dot product cosine of the match
-* mcount - number of matching peaks
-* allcount - total number of peaks across both query and library spectra
-* mpercent - percentage of matching peaks across both query and library spectra
-* accession -  accession of library match
-* name - name of library match
-* inchikey - inchikey of library match
-* lpid - pid in database of library match
-* qpid - pid in database of query match
-* mid - id of the match
 
 **xcmsMatchedResults**
 
 If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will
 be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns
 
-* pid - pid in database of query match
-* grpid - grpid of the XCMS grouped feature for query match
-* mz - derived from XCMS grouped feature
-* mzmin - derived from XCMS grouped feature
-* mzmax - derived from XCMS grouped feature
-* rt - derived from XCMS grouped feature
-* rtmin - derived from XCMS grouped feature
-* rtmax - derived from XCMS grouped feature
-* npeaks - derived from XCMS grouped feature
-* grp_name - derived from XCMS grouped feature
+* lpid - id in database of library spectra
+* qpid - id in database of query spectra
 * dpc - dot product cosine of the match
 * rdpc - reverse dot product cosine of the match
 * cdpc - composite dot product cosine of the match
 * mcount - number of matching peaks
 * allcount - total number of peaks across both query and library spectra
 * mpercent - percentage of matching peaks across both query and library spectra
-* accession -  accession of library match
-* name - name of library match
-* inchikey - inchikey of library match
-* lpid - pid in database of library match
-* mid - id of the match
+* library_rt - retention time of library spectra
+* query_rt - retention time of query spectra
+* rtdiff - difference between library and query retention time
+* library_precursor_mz - library precursor mz
+* query_precursor_mz - query precursor mz
+* library_precursor_ion_purity - library precursor ion purity
+* query_precursor_ion_purity - query precursor ion purity
+* library_accession -  library accession value (unique string or number given to eith MoNA or Massbank data entires)
+* library_precursor_type - library precursor type (i.e. adduct)
+* library_entry_name - Name given to the library spectra
+* inchikey - inchikey of the matched library spectra
+* library_source_name - source of the spectra (e.g. massbank, gnps)
+* library_compound_name - name of compound spectra was obtained from
+
+**matchedResults**
+
+All matched results from the query spectra to the library spectra. Contains the same as above
+without the XCMS details. This table is useful to observe spectral matching results
+for all MS/MS spectra irrespective of if they are linked to XCMS MS1 features.
 
 
 .. _spectral_database_schema: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-datatabase-schema.html