diff peptide_shaker.xml @ 0:8b99cb00e1c4 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/peptideshaker commit 3c83b8f8ca60d6399bc5dd7096b830ca1f1c457e-dirty
author jjohnson
date Tue, 15 May 2018 14:50:35 -0400
parents
children fa76abf69433
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/peptide_shaker.xml	Tue May 15 14:50:35 2018 -0400
@@ -0,0 +1,513 @@
+<tool id="peptide_shaker" name="Peptide Shaker" version="1.16.22.0">
+    <description>
+        Perform protein identification using various search engines based on results from SearchGUI
+    </description>
+    <macros>
+        <import>macros_basic.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="1.16.22">peptide-shaker</requirement>
+    </requirements>
+    <expand macro="stdio" />
+    <command>
+<![CDATA[
+        #from datetime import datetime
+        #set $exp_str = "Galaxy_Experiment_%s" % datetime.now().strftime("%Y%m%d%H%M%s")
+        #set $samp_str = "Sample_%s" % datetime.now().strftime("%Y%m%d%H%M%s")
+        #set $temp_stderr = "peptideshaker_stderr"
+        #set $bin_dir = "bin"
+
+
+        mkdir output_reports;
+        cwd=`pwd`;
+
+        ln -s '$searchgui_input' searchgui_input.zip &&
+        jar xvf searchgui_input.zip SEARCHGUI_IdentificationParameters.par &&
+        ######################
+        ## PeptideShakerCLI ##
+        ######################
+        (peptide-shaker eu.isas.peptideshaker.cmd.PeptideShakerCLI
+            --exec_dir="\$cwd/${bin_dir}"
+            -temp_folder \$cwd/PeptideShakerCLI
+            -experiment '$exp_str'
+            -sample '$samp_str'
+            -replicate 1
+            -identification_files \$cwd/searchgui_input.zip
+            -id_params \$cwd/SEARCHGUI_IdentificationParameters.par
+            -out \$cwd/peptideshaker_output.cpsx
+            -zip \$cwd/peptideshaker_output.zip
+            #set $cleaned_list = str($outputs).split(',')
+            #if 'cps' in $cleaned_list:
+                #silent $cleaned_list.remove('cps')
+            #end if
+            #if 'mzidentML' in $cleaned_list:
+                #silent $cleaned_list.remove('mzidentML')
+            #end if
+            #if 'zip' in $cleaned_list:
+                #silent $cleaned_list.remove('zip')
+            #end if
+            #if len($cleaned_list) > 0
+              ## Only numbers are left over. These corresponds to different reports.
+              -reports #echo ','.join($cleaned_list)#
+            #end if
+
+            -threads "\${GALAXY_SLOTS:-12}"
+
+
+
+        2>> $temp_stderr)
+
+        &&
+
+        echo "Running Reports";
+
+        ##################################
+        ## PeptideShaker Report options ##
+        ##################################
+
+        #if 'mzidentML' in str($outputs).split(','):
+            echo "Generating mzIdentML";
+            (peptide-shaker eu.isas.peptideshaker.cmd.MzidCLI
+                --exec_dir="\$cwd/${bin_dir}"
+                -in \$cwd/peptideshaker_output.zip
+                -output_file \$cwd/output.mzid
+                #if $contact_options.contact_options_selector == "yes":
+                    -contact_first_name "$contact_options.contact_first_name"
+                    -contact_last_name "$contact_options.contact_last_name"
+                    -contact_email "$contact_options.contact_email"
+                    -contact_address "$contact_options.contact_address"
+                    #if str($contact_options.contact_url).strip() != '':
+                        -contact_url = "$contact_options.contact_url"
+                    #end if
+                    -organization_name "$contact_options.organization_name"
+                    -organization_email "$contact_options.organization_email"
+                    -organization_address "$contact_options.organization_address"
+                    #if str($contact_options.organization_url).strip() != '':
+                        -organization_url = "$contact_options.organization_url"
+                    #end if
+                #else:
+                    -contact_first_name "Proteomics"
+                    -contact_last_name "Galaxy"
+                    -contact_email "galaxyp@umn.edu"
+                    -contact_address "galaxyp@umn.edu"
+                    -organization_name "University of Minnesota"
+                    -organization_email "galaxyp@umn.edu"
+                    -organization_address "Minneapolis, MN 55455, Vereinigte Staaten"
+                #end if
+            2>> $temp_stderr)
+            &&
+        #end if
+
+        ## Generate Reports if the user has selected one of the 8 additional reports
+        ## 'cps', 'mzidentML' and 'zip' are not valid options for PeptideShaker
+        ## and will not be passed to the command line
+        #if set(["0","1","2","3","4","5","6","7"]).intersection( set( str( $outputs ).split(',') ) ):
+
+            (peptide-shaker eu.isas.peptideshaker.cmd.ReportCLI
+                --exec_dir="\$cwd/${bin_dir}"
+                -temp_folder \$cwd/ReportCLI
+                -in \$cwd/peptideshaker_output.zip
+                -out_reports \$cwd/output_reports
+                #set $cleaned_list = str($outputs).split(',')
+                #if 'cps' in $cleaned_list:
+                    #silent $cleaned_list.remove('cps')
+                #end if
+                #if 'mzidentML' in $cleaned_list:
+                    #silent $cleaned_list.remove('mzidentML')
+                #end if
+                #if 'zip' in $cleaned_list:
+                    #silent $cleaned_list.remove('zip')
+                #end if
+                ## Only numbers are left over. These corresponds to different reports.
+                -reports #echo ','.join($cleaned_list)#
+
+            2>> $temp_stderr)
+            &&
+        #end if
+
+        ## # ls -l \$cwd/output_reports/* ;
+
+        #if '0' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*Certificate_of_Analysis*' -exec bash -c 'mv "$0" "certificate.txt"' {} \;
+            ;
+        #end if
+        #if '1' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*Hierarchical*' -exec bash -c 'mv "$0" "hierarchical.txt"' {} \;
+            ;
+        #end if
+        #if '2' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*PSM_Phosphorylation_Report*' -exec bash -c 'mv "$0" "psm_phospho.txt"' {} \;
+            ;
+        #end if
+        #if '8' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*Extended_PSM_Report*' -exec bash -c 'mv "$0" "psmx.txt"' {} \;
+            ;
+        #end if
+        #if '3' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*PSM_Report*' -exec bash -c 'mv "$0" "psm.txt"' {} \;
+            ;
+        #end if
+        #if '4' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*Peptide_Phosphorylation_Report*' -exec bash -c 'mv "$0" "peptides_phospho.txt"' {} \;
+            ;
+        #end if
+        #if '5' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*Peptide_Report*' -exec bash -c 'mv "$0" "peptides.txt"' {} \;
+            ;
+        #end if
+        #if '6' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*Protein_Phosphorylation_Report*' -exec bash -c 'mv "$0" "proteins_phospho.txt"' {} \;
+            ;
+        #end if
+        #if '7' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*Protein_Report*' -exec bash -c 'mv "$0" "proteins.txt"' {} \;
+            ;
+        #end if
+
+        exit_code_for_galaxy=\$?;
+        cat $temp_stderr 2>&1;
+        (exit \$exit_code_for_galaxy)
+
+]]>
+    </command>
+    <inputs>
+        <param name="searchgui_input" format="searchgui_archive" type="data" label="Compressed SearchGUI results"
+            help="SearchGUI Results from History">
+            <options options_filter_attribute="metadata.searchgui_major_version" >
+                <filter type="add_value" value="@SEARCHGUI_MAJOR_VERSION@" />
+            </options>
+        </param>
+
+
+        <conditional name="contact_options">
+            <param name="contact_options_selector" type="select" label="Specify Contact Information for mzIdendML"
+                help="Create a Galaxy workflow to save these values">
+                <option value="no" selected="True">GalaxyP Project contact (Not suitable for PRIDE submission)</option>
+                <option value="yes">Specify Contact Information</option>
+            </param>
+            <when value="no" />
+            <when value="yes">
+                <param name="contact_first_name" type="text" value="" label="Contact first name.">
+                    <validator type="regex" message="">\S+.*</validator>
+                </param>
+                <param name="contact_last_name" type="text" value="" label="Contact last name.">
+                    <validator type="regex" message="">\S+.*</validator>
+                </param>
+                <param name="contact_email" type="text" value="" label="Contact e-mail.">
+                    <validator type="regex" message="">\S+@\S+</validator>
+                </param>
+                <param name="contact_address" type="text" value="" label="Contact address.">
+                    <validator type="regex" message="">\S+.*</validator>
+                </param>
+                <param name="contact_url" type="text" value="" optional="true" label="Contact URL.">
+                </param>
+                <param name="organization_name" type="text" value="" label="Organization name.">
+                    <validator type="regex" message="">\S+.*</validator>
+                </param>
+                <param name="organization_email" type="text" value="" label="Organization e-mail.">
+                    <validator type="regex" message="">\S+@\S+</validator>
+                </param>
+                <param name="organization_address" type="text" value="" label="Organization address.">
+                    <validator type="regex" message="">\S+.*</validator>
+                </param>
+                <param name="organization_url" type="text" value="" optional="true" label="Organization URL.">
+                </param>
+            </when>
+        </conditional>
+
+        <param name="include_sequences" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Include the protein sequences in mzIdentML" />
+        <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output options">
+            <option value="zip">Zip File for import to Desktop App</option>
+            <option value="mzidentML" selected="True">mzidentML File</option>
+            <option value="3">PSM Report</option>
+            <option value="8">Extended PSM Report</option>
+            <option value="2">PSM Phosphorylation Report</option>
+            <option value="5">Peptide Report</option>
+            <option value="4">Peptide Phosphorylation Report</option>
+            <option value="7">Protein Report</option>
+            <option value="6">Protein Phosphorylation Report</option>
+            <option value="0">Certificate of Analysis</option>
+            <option value="1">Hierarchical Report</option>
+            <option value="cps">CPS file</option>
+            <validator type="no_options" message="Please select at least one output file" />
+        </param>
+
+    </inputs>
+    <outputs>
+        <data format="mzid" name="mzidentML" from_work_dir="output.mzid" label="${tool.name} on ${on_string}: mzidentML file">
+            <filter>'mzidentML' in outputs</filter>
+        </data>
+        <data format="peptideshaker_archive" name="output_cps" from_work_dir="peptideshaker_output.cpsx" label="${tool.name} on ${on_string}: CPS file">
+            <filter>'cps' in outputs</filter>
+        </data>
+        <data format="zip" name="output_zip" from_work_dir="peptideshaker_output.zip" label="${tool.name} on ${on_string}: Archive">
+            <filter>'zip' in outputs</filter>
+        </data>
+        <data format="txt" name="output_certificate" from_work_dir="certificate.txt" label="${tool.name} on ${on_string}: Parameters">
+            <filter>'0' in outputs</filter>
+        </data>
+        <data format="tabular" name="output_hierarchical" from_work_dir="hierarchical.txt" label="${tool.name} on ${on_string}: Hierarchical Report">
+            <filter>'1' in outputs</filter>
+        </data>
+        <data format="tabular" name="output_psm_phosphorylation" from_work_dir="psm_phospho.txt" label="${tool.name} on ${on_string}: PSM Phosphorylation Report">
+            <filter>'2' in outputs</filter>
+        </data>
+        <data format="tabular" name="output_psm" from_work_dir="psm.txt" label="${tool.name} on ${on_string}: PSM Report">
+            <filter>'3' in outputs</filter>
+        </data>
+        <data format="tabular" name="output_extended_psm" from_work_dir="psmx.txt" label="${tool.name} on ${on_string}: Extended PSM Report">
+            <filter>'8' in outputs</filter>
+        </data>
+        <data format="tabular" name="output_peptides_phosphorylation" from_work_dir="peptides_phospho.txt" label="${tool.name} on ${on_string}: Peptide Phosphorylation Report">
+            <filter>'4' in outputs</filter>
+        </data>
+        <data format="tabular" name="output_peptides" from_work_dir="peptides.txt" label="${tool.name} on ${on_string}: Peptide Report">
+            <filter>'5' in outputs</filter>
+        </data>
+        <data format="tabular" name="output_proteins_phosphorylation" from_work_dir="proteins_phospho.txt" label="${tool.name} on ${on_string}: Protein Phosphorylation Report">
+            <filter>'6' in outputs</filter>
+        </data>
+        <data format="tabular" name="output_proteins" from_work_dir="proteins.txt" label="${tool.name} on ${on_string}: Protein Report">
+            <filter>'7' in outputs</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/>
+            <param name="processing_options_selector" value="no"/>
+            <param name="filtering_options_selector" value="no"/>
+            <param name="outputs" value="zip,3"/>
+            <output name="output_zip" file="peptide_shaker_result1.zip" ftype="zip" compare="sim_size" delta="3000" />
+            <output name="output_psm">
+                <assert_contents>
+                    <has_text text="cds.comp41779_c0_seq1" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/>
+            <param name="processing_options_selector" value="no"/>
+            <param name="filtering_options_selector" value="yes"/>
+            <param name="min_peptide_length" value="1"/>
+            <param name="outputs" value="0,1,2,3,4,5,6,7"/>
+            <output name="output_certificate">
+                <assert_contents>
+                    <has_text text="Tolerance: 100" />
+                    <has_text text="Carbamidomethylation of C" />
+                </assert_contents>
+            </output>
+            <output name="output_hierarchical">
+                <assert_contents>
+                    <has_text_matching expression="1.1\tcds.comp" />
+                </assert_contents>
+            </output>
+            <output name="output_psm">
+                <assert_contents>
+                    <not_has_text text="Phosphosite" />
+                    <has_text text="cds.comp41779_c0_seq1" />
+                </assert_contents>
+            </output>
+            <output name="output_psm_phosphorylation">
+                <assert_contents>
+                    <has_text text="Phosphosite" />
+                    <has_text text="cds.comp41779_c0_seq1" />
+                </assert_contents>
+            </output>
+            <output name="output_peptides">
+                <assert_contents>
+                    <has_text text="Modification" />
+                    <has_text text="cds.comp41779_c0_seq1" />
+                </assert_contents>
+            </output>
+            <output name="output_peptides_phosphorylation">
+                <assert_contents>
+                    <has_text text="Phosphosite" />
+                    <has_text text="cds.comp41779_c0_seq1" />
+                </assert_contents>
+            </output>
+            <output name="output_proteins">
+                <assert_contents>
+                    <has_text text="Modification" />
+                    <has_text text="cds.comp41779_c0_seq1" />
+                </assert_contents>
+            </output>
+            <output name="output_proteins_phosphorylation">
+                <assert_contents>
+                    <has_text text="Phosphosite" />
+                    <has_text text="cds.comp41779_c0_seq1" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/>
+            <param name="processing_options_selector" value="no"/>
+            <param name="filtering_options_selector" value="no"/>
+            <param name="outputs" value="3,cps"/>
+            <output name="output_cps" file="peptide_shaker_result1.cpsx" ftype="peptideshaker_archive" compare="sim_size" delta="3000"/>
+            <output name="output_psm">
+                <assert_contents>
+                    <has_text text="cds.comp41779_c0_seq1" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Combines search results given by SearchGUI after running multiple search engines (X! Tandem, OMSSA and MS-GF+) on any number of MGF peak lists.
+
+http://compomics.github.io/projects/peptide-shaker.html
+
+http://compomics.github.io/projects/searchgui.html
+
+----
+
+Reports
+=======
+
+
+PSM Report
+----------
+
+* Protein(s):                Protein(s) to which the peptide can be attached
+* Sequence:                  Sequence of the peptide
+* Variable Modifications:   The variable modifications
+* D-score:	                D-score for variable PTM localization
+* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
+* Localization Confidence:	The confidence in variable PTM localization.
+* Fixed Modifications:	    The fixed modifications.
+* Spectrum File:	The spectrum file.
+* Spectrum Title:	The title of the spectrum.
+* Spectrum Scan Number:	The spectrum scan number.
+* RT:	Retention time
+* m/z:	Measured m/z
+* Measured Charge:	The charge as given in the spectrum file.
+* Identification Charge:	The charge as inferred by the search engine.
+* Theoretical Mass:	The theoretical mass of the peptide.
+* Isotope Number:	The isotope number targetted by the instrument.
+* Precursor m/z Error:	The precursor m/z matching error.
+* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
+* Confidence:	Confidence in percent associated to the retained PSM.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation: Indicates the validation level of the protein group.
+
+
+Protein Report
+--------------
+
+* Main Accession:	Main accession of the protein group.
+* Description:	Description of the protein designed by the main accession.
+* Gene Name:	The gene names of the Ensembl gene ID associated to the main accession.
+* Chromosome:	The chromosome of the Ensembl gene ID associated to the main accession.
+* PI:	Protein Inference status of the protein group.
+* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
+* Protein Group:	The complete protein group (alphabetical order).
+* #Peptides:	Total number of peptides.
+* #Validated Peptides:	Number of validated peptides.
+* #Unique:	Total number of peptides unique to this protein group.
+* #PSMs:	Number of PSMs
+* #Validated PSMs:	Number of validated PSMs
+* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
+* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
+* MW (kDa):	Molecular Weight.
+* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
+* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
+* Confident Modification Sites: Number of Confident Modification Sites	List of the sites where a variable modification was confidently localized.
+* Other Modification Sites: Number of other Modification Sites	List of the non*confident sites where a variable modification was localized.
+* Score:	Score of the protein group.
+* Confidence:	Confidence in percent associated to the protein group.
+* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+
+
+Peptide Report
+--------------
+
+
+* Protein(s):	Protein(s) to which this peptide can be attached.
+* AAs Before:	The amino-acids before the sequence.
+* Sequence:	Sequence of the peptide.
+* AAs After:	The amino-acids after the sequence.
+* Modified Sequence:	The peptide sequence annotated with variable modifications.
+* Variable Modifications:	The variable modifications.
+* Localization Confidence:	The confidence in PTMs localization.
+* Fixed Modifications:	The fixed modifications.
+* #Validated PSMs:	Number of validated PSMs.
+* #PSMs:	Number of PSMs.
+* Score:	Score of the peptide.
+* Confidence:	Confidence in percent associated to the peptide.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+
+
+Hierachical Report
+------------------
+
+* Main Accession:	Main accession of the protein group.
+* Description:	Description of the protein designed by the main accession.
+* PI:	Protein Inference status of the protein group.
+* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
+* Protein Group:	The complete protein group (alphabetical order).
+* #Peptides:	Total number of peptides.
+* #Validated Peptides:	Number of validated peptides.
+* #Unique:	Total number of peptides unique to this protein group.
+* #PSMs:	Number of PSMs
+* #Validated PSMs:	Number of validated PSMs
+* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
+* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
+* MW (kDa):	Molecular Weight.
+* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
+* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
+* Confident Modification Sites: # Confident Modification Sites	List of the sites where a variable modification was confidently localized.
+* Other Modification Sites: # Other Modification Sites	List of the non-confident sites where a variable modification was localized.
+* Score:	Score of the protein group.
+* Confidence:	Confidence in percent associated to the protein group.
+* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+* Protein(s):	Protein(s) to which this peptide can be attached.
+* AAs Before:	The amino-acids before the sequence.
+* Sequence:	Sequence of the peptide.
+* AAs After:	The amino-acids after the sequence.
+* Variable Modifications:	The variable modifications.
+* Localization Confidence:	The confidence in PTMs localization.
+* Fixed Modifications:	The fixed modifications.
+* #Validated PSMs:	Number of validated PSMs.
+* #PSMs:	Number of PSMs.
+* Score:	Score of the peptide.
+* Confidence:	Confidence in percent associated to the peptide.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+* Protein(s):	Protein(s) to which the peptide can be attached.
+* Sequence:	Sequence of the peptide.
+* Modified Sequence:	The peptide sequence annotated with variable modifications.
+* Variable Modifications:	The variable modifications.
+* D-score:	D-score for variable PTM localization.
+* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
+* Localization Confidence:	The confidence in variable PTM localization.
+* Fixed Modifications:	The fixed modifications.
+* Spectrum File:	The spectrum file.
+* Spectrum Title:	The title of the spectrum.
+* Spectrum Scan Number:	The spectrum scan number.
+* RT:	Retention time
+* m/z:	Measured m/z
+* Measured Charge:	The charge as given in the spectrum file.
+* Identification Charge:	The charge as inferred by the search engine.
+* Theoretical Mass:	The theoretical mass of the peptide.
+* Isotope Number:	The isotope number targetted by the instrument.
+* Precursor m/z Error:	The precursor m/z matching error.
+* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
+* Confidence:	Confidence in percent associated to the retained PSM.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+
+
+------
+
+**Citation**
+
+To cite the underlying tools (PeptideShaker and SearchGUI) please refer to the list of papers at http://compomics.github.io
+
+If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al.
+    </help>
+    <expand macro="citations" />
+</tool>