Mercurial > repos > bgruening > peptideshaker

--- a/peptide_shaker.xml	Tue May 06 15:45:35 2014 -0400
+++ b/peptide_shaker.xml	Wed May 07 00:47:14 2014 -0400
@@ -1,6 +1,6 @@
 <tool id="peptide_shaker" name="Peptide Shaker" version="1.18.3.0">
     <description>
-        Peform protein identification combining X! Tandem and OMSSA (using SearchGUI) and PeptideShaker pipeline.
+        Perform protein identification using various search engines (using SearchGUI) and combine results with PeptideShaker.
     </description>
     <requirements>
         <requirement type="package" version="0.28.0">peptide_shaker</requirement>
@@ -244,9 +244,9 @@
     </command>
     <inputs>
         <param format="fasta" name="input_database" type="data" label="Protein Database"
-            help="Select FASTA database from history. Typically, a target-decoy database is incorporated into the Scaffold engine for FDR analysis"/>
+            help="Select FASTA database from history"/>

-        <param name="create_decoy" label="Create a concatenated target/decoy database before running PeptideShaker." type="boolean" truevalue="True" falsevalue="False" checked="true" />
+        <param name="create_decoy" label="Create a concatenated target/decoy database before running PeptideShaker." type="boolean" truevalue="True" falsevalue="False" checked="true" help="Selecting this option will help PeptideShaker calculate FDR values" />

         <param format="mgf" name="peak_lists" type="data" multiple="true" label="Input Peak Lists (mgf)"
             help="Select appropriate MGF dataset(s) from history" />
@@ -288,21 +288,21 @@
             <option value="Semi-Glu-C">Semi-Glu-C</option>
         </param>

-        <param name="correct_titles" type="select" label="Correct for duplicate spectrum titles"
-            help="(-correct_titles)">
+        <param name="correct_titles" type="select" label="How should PeptideShaker deal with duplicate spectra?"
+            help="Unless you suspect some input files to be genuine duplicates then rename spectra is the safest option">
             <option value="0">no correction</option>
             <option value="1" selected="True">rename spectra</option>
             <option value="2">delete spectra</option>
         </param>

         <param name="mgf_splitting" type="integer" value="1000" label="The maximum mgf file size in MB before splitting the mgf"
-            help="(-mgf_splitting)"/>
+            help="Choose a smaller value if you are running on a machine with limited memory"/>
         <param name="mgf_spectrum_count" type="integer" value="25000" label="The maximum number of spectra per mgf file when splitting"
-            help="(-mgf_spectrum_count)"/>
+            help="Choose a smaller value if you are running on a machine with limited memory"/>

         <conditional name="species_type">
             <param name="species_type_selector" type="select" optional="true" label="The species type to use for the gene annotation"
-                help="(-species_type)">
+                help="(Optional) If your species is supported under one of the groups, selecting it will allow GO annotations and other information to be included in the results">
                 <option value="Fungi">Fungi</option>
                 <option value="Plants">Plants</option>
                 <option value="Metazoa">Metazoa</option>
@@ -312,7 +312,7 @@
             </param>
             <when value="Protists">
                 <param name="species" type="select" label="The species to use for the gene annotation"
-                    help="(-species)">
+                    help="">
                     <option value="Albugo laibachii">Albugo laibachii</option>
                     <option value="Dictyostelium discoideum">Dictyostelium discoideum</option>
                     <option value="Entamoeba histolytica">Entamoeba histolytica</option>
@@ -338,7 +338,7 @@
             </when>
             <when value="Plants">
                 <param name="species" type="select" label="The species to use for the gene annotation"
-                    help="(-species)">
+                    help="">
                     <option value="Arabidopsis lyrata">Arabidopsis lyrata</option>
                     <option value="Arabidopsis thaliana">Arabidopsis thaliana</option>
                     <option value="Brachypodium distachyon">Brachypodium distachyon</option>
@@ -366,7 +366,7 @@
             </when>
             <when value="Metazoa">
                 <param name="species" type="select" label="The species to use for the gene annotation"
-                    help="(-species)">
+                    help="">
                     <option value="Acyrthosiphon pisum">Acyrthosiphon pisum</option>
                     <option value="Aedes aegypti">Aedes aegypti</option>
                     <option value="Amphimedon queenslandica">Amphimedon queenslandica</option>
@@ -414,7 +414,7 @@
             </when>
             <when value="Fungi">
                 <param name="species" type="select" label="The species to use for the gene annotation"
-                    help="(-species)">
+                    help="">
                     <option value="Ashbya gossypii">Ashbya gossypii</option>
                     <option value="Aspergillus clavatus">Aspergillus clavatus</option>
                     <option value="Aspergillus flavus">Aspergillus flavus</option>
@@ -458,7 +458,7 @@
             </when>
             <when value="Vertebrates">
                 <param name="species" type="select" label="The species to use for the gene annotation"
-                    help="(-species)">
+                    help="">
                     <option value="Ailuropoda melanoleuca">Ailuropoda melanoleuca</option>
                     <option value="Anolis carolinensis">Anolis carolinensis</option>
                     <option value="Bos taurus">Bos taurus</option>
@@ -524,7 +524,7 @@
         </conditional>

         <param name="missed_cleavages" type="integer" value="2" label="Maximum Missed Cleavages"
-            help="Allow peptides to contain up to this many missed enzyme cleavage sites. 2 is the recommended value"/>
+            help="Allow peptides to contain up to this many missed enzyme cleavage sites."/>
         <param name="fixed_modifications" type="select" label="Fixed Modifications" multiple="true"
             help="Occurs in known places on peptide sequence. Hold the appropriate key while clicking to select multiple items">
             <options from_file="searchgui_mods.loc">
@@ -616,7 +616,7 @@
                 <param name="peptide_fdr" label="FDR at the peptide level" help="In percent (default 1% FDR: '1')" value="1" type="float" />
                 <param name="psm_fdr" label="FDR at the PSM level" help="In percent (default 1% FDR: '1')" value="1" type="float" />
                 <param name="protein_fraction_mw_confidence" label="Minimum confidence required for a protein in the fraction MW plot" value="" type="float"
-                    help="default 95%: '95.0' (-protein_fraction_mw_confidence)" />
+                    help="default 95%: '95.0'" />
                 <conditional name="ptm_score">
                     <param name="ptm_score_selector" type="select" label="The PTM probabilistic score to use for PTM localization">
                         <option value="0" selected="True">A-score</option>
@@ -626,7 +626,7 @@
                     <when value="1">
                         <param name="score_neutral_losses" label="Include Neutral Losses in A Score" type="boolean" truevalue="1" falsevalue="0" />
                         <param name="ptm_threshold" label="The threshold to use for the PTM scores" value="" type="float"
-                            help="Automatic mode will be used if not set." />
+                            help="Automatic mode will be used if not set" />
                     </when>
                 </conditional>
                 <!-- SKIPPING -protein_fraction_mw_confidence ${processing_options.protein_fraction_mw_confidence} -->
@@ -642,14 +642,14 @@
             <when value="yes">
                 <param name="min_peptide_length" label="Minimum Peptide Length" value="6" type="integer" />
                 <param name="max_peptide_length" label="Maximum Peptide Length" value="30" type="integer" />
-                <param name="max_precursor_error" label="Maximum Precursor Error" value="10" type="float" help="Next option specifies units (Da or ppm)." />
+                <param name="max_precursor_error" label="Maximum Precursor Error" value="10" type="float" help="Next option specifies units (Da or ppm)" />
                 <param name="max_precursor_error_type" label="Maximum Precursor Error Type" type="select">
                     <option value="0">ppm</option>
                     <option value="1">Daltons</option>
                 </param>
                 <param name="max_xtandem_e" label="Maximum X! Tandem e-value" value="100" type="float" help="" />
                 <param name="max_omssa_e" label="Maximum OMSSA e-value" value="100" type="float" help="" />
-                <param name="max_mascot_e" label="Maximum Mascot e-value filter" value="100" type="float" help="(-max_mascot_e)" />
+                <param name="max_mascot_e" label="Maximum Mascot e-value filter" value="100" type="float" help="" />
                 <param name="exclude_unknown_ptms" label="Exclude Unknown PTMs" type="boolean" truevalue="1" falsevalue="0" checked="true" />
             </when>
         </conditional>
@@ -661,7 +661,7 @@
             <option value="0">Certificate of Analysis</option>
             <option value="1">Hierarchical Report</option>
             <option value="cps">CPS file</option>
-            <validator type="no_options" message="Please select at least one output file." />
+            <validator type="no_options" message="Please select at least one output file" />
         </param>
     </inputs>
     <outputs>
@@ -708,139 +708,148 @@
 Runs multiple search engines (X! Tandem and OMSSA) on any number of MGF peak lists using the SearchGUI application and combines the results.

 https://code.google.com/p/peptide-shaker/
+
 https://code.google.com/p/searchgui/

-**Reports**
-
-*PSM Report*
+----

-Protein(s)	Protein(s) to which the peptide can be attached.
-Sequence	Sequence of the peptide.
-Variable Modifications	The variable modifications.
-D-score	D-score for variable PTM localization.
-probabilistic PTM score	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
-Localization Confidence	The confidence in variable PTM localization.
-Fixed Modifications	The fixed modifications.
-Spectrum File	The spectrum file.
-Spectrum Title	The title of the spectrum.
-Spectrum Scan Number	The spectrum scan number.
-RT	Retention time
-m/z	Measured m/z
-Measured Charge	The charge as given in the spectrum file.
-Identification Charge	The charge as inferred by the search engine.
-Theoretical Mass	The theoretical mass of the peptide.
-Isotope Number	The isotope number targetted by the instrument.
-Precursor m/z Error	The precursor m/z matching error.
-Score	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
-Confidence	Confidence in percent associated to the retained PSM.
-Decoy	Indicates whether the peptide is a decoy (1: yes, 0: no).
-Validation	Indicates the validation level of the protein group.
+Reports
+=======


-*Protein Report*
+PSM Report
+----------

-Main Accession	Main accession of the protein group.
-Description	Description of the protein designed by the main accession.
-Gene Name	The gene names of the Ensembl gene ID associated to the main accession.
-Chromosome	The chromosome of the Ensembl gene ID associated to the main accession.
-PI	Protein Inference status of the protein group.
-Secondary Accessions	Other accessions in the protein group (alphabetical order).
-Protein Group	The complete protein group (alphabetical order).
-#Peptides	Total number of peptides.
-#Validated Peptides	Number of validated peptides.
-#Unique	Total number of peptides unique to this protein group.
-#PSMs	Number of PSMs
-#Validated PSMs	Number of validated PSMs
-Coverage (%)	Sequence coverage in percent of the protein designed by the main accession.
-Possible Coverage (%)	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
-MW (kDa)	Molecular Weight.
-Spectrum Counting NSAF 	Normalized Spectrum Abundance Factor (NSAF)
-Spectrum Counting emPAI	exponentially modified Protein Abundance Index (emPAI)
-Confident Modification Sites, # Confident Modification Sites	List of the sites where a variable modification was confidently localized.
-Other Modification Sites, # Other Modification Sites	List of the non-confident sites where a variable modification was localized.
-Score	Score of the protein group.
-Confidence	Confidence in percent associated to the protein group.
-Decoy	Indicates whether the protein group is a decoy (1: yes, 0: no).
-Validation	Indicates the validation level of the protein group.
+* Protein(s):                Protein(s) to which the peptide can be attached
+* Sequence:	                Sequence of the peptide
+* Variable Modifications:	The variable modifications
+* D-score:	                D-score for variable PTM localization
+* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
+* Localization Confidence:	The confidence in variable PTM localization.
+* Fixed Modifications:	    The fixed modifications.
+* Spectrum File:	The spectrum file.
+* Spectrum Title:	The title of the spectrum.
+* Spectrum Scan Number:	The spectrum scan number.
+* RT:	Retention time
+* m/z:	Measured m/z
+* Measured Charge:	The charge as given in the spectrum file.
+* Identification Charge:	The charge as inferred by the search engine.
+* Theoretical Mass:	The theoretical mass of the peptide.
+* Isotope Number:	The isotope number targetted by the instrument.
+* Precursor m/z Error:	The precursor m/z matching error.
+* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
+* Confidence:	Confidence in percent associated to the retained PSM.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation: Indicates the validation level of the protein group.


-*Peptide Report*
+Protein Report
+--------------
+
+* Main Accession:	Main accession of the protein group.
+* Description:	Description of the protein designed by the main accession.
+* Gene Name:	The gene names of the Ensembl gene ID associated to the main accession.
+* Chromosome:	The chromosome of the Ensembl gene ID associated to the main accession.
+* PI:	Protein Inference status of the protein group.
+* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
+* Protein Group:	The complete protein group (alphabetical order).
+* #Peptides:	Total number of peptides.
+* #Validated Peptides:	Number of validated peptides.
+* #Unique:	Total number of peptides unique to this protein group.
+* #PSMs:	Number of PSMs
+* #Validated PSMs:	Number of validated PSMs
+* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
+* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
+* MW (kDa):	Molecular Weight.
+* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
+* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
+* Confident Modification Sites: Number of Confident Modification Sites	List of the sites where a variable modification was confidently localized.
+* Other Modification Sites: Number of other Modification Sites	List of the non*confident sites where a variable modification was localized.
+* Score:	Score of the protein group.
+* Confidence:	Confidence in percent associated to the protein group.
+* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+
+
+Peptide Report
+--------------


-Protein(s)	Protein(s) to which this peptide can be attached.
-AAs Before	The amino-acids before the sequence.
-Sequence	Sequence of the peptide.
-AAs After	The amino-acids after the sequence.
-Modified Sequence	The peptide sequence annotated with variable modifications.
-Variable Modifications	The variable modifications.
-Localization Confidence	The confidence in PTMs localization.
-Fixed Modifications	The fixed modifications.
-#Validated PSMs	Number of validated PSMs.
-#PSMs	Number of PSMs.
-Score	Score of the peptide.
-Confidence	Confidence in percent associated to the peptide.
-Decoy	Indicates whether the peptide is a decoy (1: yes, 0: no).
-Validation	Indicates the validation level of the protein group.
+* Protein(s):	Protein(s) to which this peptide can be attached.
+* AAs Before:	The amino-acids before the sequence.
+* Sequence:	Sequence of the peptide.
+* AAs After:	The amino-acids after the sequence.
+* Modified Sequence:	The peptide sequence annotated with variable modifications.
+* Variable Modifications:	The variable modifications.
+* Localization Confidence:	The confidence in PTMs localization.
+* Fixed Modifications:	The fixed modifications.
+* #Validated PSMs:	Number of validated PSMs.
+* #PSMs:	Number of PSMs.
+* Score:	Score of the peptide.
+* Confidence:	Confidence in percent associated to the peptide.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.


-*Hirachical Report*
+Hierachical Report
+------------------

-Main Accession	Main accession of the protein group.
-Description	Description of the protein designed by the main accession.
-PI	Protein Inference status of the protein group.
-Secondary Accessions	Other accessions in the protein group (alphabetical order).
-Protein Group	The complete protein group (alphabetical order).
-#Peptides	Total number of peptides.
-#Validated Peptides	Number of validated peptides.
-#Unique	Total number of peptides unique to this protein group.
-#PSMs	Number of PSMs
-#Validated PSMs	Number of validated PSMs
-Coverage (%)	Sequence coverage in percent of the protein designed by the main accession.
-Possible Coverage (%)	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
-MW (kDa)	Molecular Weight.
-Spectrum Counting NSAF 	Normalized Spectrum Abundance Factor (NSAF)
-Spectrum Counting emPAI	exponentially modified Protein Abundance Index (emPAI)
-Confident Modification Sites, # Confident Modification Sites	List of the sites where a variable modification was confidently localized.
-Other Modification Sites, # Other Modification Sites	List of the non-confident sites where a variable modification was localized.
-Score	Score of the protein group.
-Confidence	Confidence in percent associated to the protein group.
-Decoy	Indicates whether the protein group is a decoy (1: yes, 0: no).
-Validation	Indicates the validation level of the protein group.
-Protein(s)	Protein(s) to which this peptide can be attached.
-AAs Before	The amino-acids before the sequence.
-Sequence	Sequence of the peptide.
-AAs After	The amino-acids after the sequence.
-Variable Modifications	The variable modifications.
-Localization Confidence	The confidence in PTMs localization.
-Fixed Modifications	The fixed modifications.
-#Validated PSMs	Number of validated PSMs.
-#PSMs	Number of PSMs.
-Score	Score of the peptide.
-Confidence	Confidence in percent associated to the peptide.
-Decoy	Indicates whether the peptide is a decoy (1: yes, 0: no).
-Validation	Indicates the validation level of the protein group.
-Protein(s)	Protein(s) to which the peptide can be attached.
-Sequence	Sequence of the peptide.
-Modified Sequence	The peptide sequence annotated with variable modifications.
-Variable Modifications	The variable modifications.
-D-score	D-score for variable PTM localization.
-probabilistic PTM score	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
-Localization Confidence	The confidence in variable PTM localization.
-Fixed Modifications	The fixed modifications.
-Spectrum File	The spectrum file.
-Spectrum Title	The title of the spectrum.
-Spectrum Scan Number	The spectrum scan number.
-RT	Retention time
-m/z	Measured m/z
-Measured Charge	The charge as given in the spectrum file.
-Identification Charge	The charge as inferred by the search engine.
-Theoretical Mass	The theoretical mass of the peptide.
-Isotope Number	The isotope number targetted by the instrument.
-Precursor m/z Error	The precursor m/z matching error.
-Score	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
-Confidence	Confidence in percent associated to the retained PSM.
-Decoy	Indicates whether the peptide is a decoy (1: yes, 0: no).
-Validation	Indicates the validation level of the protein group.
+* Main Accession:	Main accession of the protein group.
+* Description:	Description of the protein designed by the main accession.
+* PI:	Protein Inference status of the protein group.
+* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
+* Protein Group:	The complete protein group (alphabetical order).
+* #Peptides:	Total number of peptides.
+* #Validated Peptides:	Number of validated peptides.
+* #Unique:	Total number of peptides unique to this protein group.
+* #PSMs:	Number of PSMs
+* #Validated PSMs:	Number of validated PSMs
+* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
+* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
+* MW (kDa):	Molecular Weight.
+* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
+* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
+* Confident Modification Sites: # Confident Modification Sites	List of the sites where a variable modification was confidently localized.
+* Other Modification Sites: # Other Modification Sites	List of the non-confident sites where a variable modification was localized.
+* Score:	Score of the protein group.
+* Confidence:	Confidence in percent associated to the protein group.
+* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+* Protein(s):	Protein(s) to which this peptide can be attached.
+* AAs Before:	The amino-acids before the sequence.
+* Sequence:	Sequence of the peptide.
+* AAs After:	The amino-acids after the sequence.
+* Variable Modifications:	The variable modifications.
+* Localization Confidence:	The confidence in PTMs localization.
+* Fixed Modifications:	The fixed modifications.
+* #Validated PSMs:	Number of validated PSMs.
+* #PSMs:	Number of PSMs.
+* Score:	Score of the peptide.
+* Confidence:	Confidence in percent associated to the peptide.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+* Protein(s):	Protein(s) to which the peptide can be attached.
+* Sequence:	Sequence of the peptide.
+* Modified Sequence:	The peptide sequence annotated with variable modifications.
+* Variable Modifications:	The variable modifications.
+* D-score:	D-score for variable PTM localization.
+* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
+* Localization Confidence:	The confidence in variable PTM localization.
+* Fixed Modifications:	The fixed modifications.
+* Spectrum File:	The spectrum file.
+* Spectrum Title:	The title of the spectrum.
+* Spectrum Scan Number:	The spectrum scan number.
+* RT:	Retention time
+* m/z:	Measured m/z
+* Measured Charge:	The charge as given in the spectrum file.
+* Identification Charge:	The charge as inferred by the search engine.
+* Theoretical Mass:	The theoretical mass of the peptide.
+* Isotope Number:	The isotope number targetted by the instrument.
+* Precursor m/z Error:	The precursor m/z matching error.
+* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
+* Confidence:	Confidence in percent associated to the retained PSM.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.


@@ -849,7 +858,7 @@

 **Citation**

-For the underlying tool, please cite `TODO`
+To cite the underlying tools (PeptideShaker and SearchGUI) please refer to the list of papers at http://peptide-shaker.googlecode.com

 If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al. https://bitbucket.org/galaxyp/peptideshaker
     </help>