Mercurial > repos > jjohnson > peptideshaker
diff peptide_shaker.xml @ 0:8b99cb00e1c4 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/peptideshaker commit 3c83b8f8ca60d6399bc5dd7096b830ca1f1c457e-dirty
author | jjohnson |
---|---|
date | Tue, 15 May 2018 14:50:35 -0400 |
parents | |
children | fa76abf69433 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_shaker.xml Tue May 15 14:50:35 2018 -0400 @@ -0,0 +1,513 @@ +<tool id="peptide_shaker" name="Peptide Shaker" version="1.16.22.0"> + <description> + Perform protein identification using various search engines based on results from SearchGUI + </description> + <macros> + <import>macros_basic.xml</import> + </macros> + <requirements> + <requirement type="package" version="1.16.22">peptide-shaker</requirement> + </requirements> + <expand macro="stdio" /> + <command> +<![CDATA[ + #from datetime import datetime + #set $exp_str = "Galaxy_Experiment_%s" % datetime.now().strftime("%Y%m%d%H%M%s") + #set $samp_str = "Sample_%s" % datetime.now().strftime("%Y%m%d%H%M%s") + #set $temp_stderr = "peptideshaker_stderr" + #set $bin_dir = "bin" + + + mkdir output_reports; + cwd=`pwd`; + + ln -s '$searchgui_input' searchgui_input.zip && + jar xvf searchgui_input.zip SEARCHGUI_IdentificationParameters.par && + ###################### + ## PeptideShakerCLI ## + ###################### + (peptide-shaker eu.isas.peptideshaker.cmd.PeptideShakerCLI + --exec_dir="\$cwd/${bin_dir}" + -temp_folder \$cwd/PeptideShakerCLI + -experiment '$exp_str' + -sample '$samp_str' + -replicate 1 + -identification_files \$cwd/searchgui_input.zip + -id_params \$cwd/SEARCHGUI_IdentificationParameters.par + -out \$cwd/peptideshaker_output.cpsx + -zip \$cwd/peptideshaker_output.zip + #set $cleaned_list = str($outputs).split(',') + #if 'cps' in $cleaned_list: + #silent $cleaned_list.remove('cps') + #end if + #if 'mzidentML' in $cleaned_list: + #silent $cleaned_list.remove('mzidentML') + #end if + #if 'zip' in $cleaned_list: + #silent $cleaned_list.remove('zip') + #end if + #if len($cleaned_list) > 0 + ## Only numbers are left over. These corresponds to different reports. + -reports #echo ','.join($cleaned_list)# + #end if + + -threads "\${GALAXY_SLOTS:-12}" + + + + 2>> $temp_stderr) + + && + + echo "Running Reports"; + + ################################## + ## PeptideShaker Report options ## + ################################## + + #if 'mzidentML' in str($outputs).split(','): + echo "Generating mzIdentML"; + (peptide-shaker eu.isas.peptideshaker.cmd.MzidCLI + --exec_dir="\$cwd/${bin_dir}" + -in \$cwd/peptideshaker_output.zip + -output_file \$cwd/output.mzid + #if $contact_options.contact_options_selector == "yes": + -contact_first_name "$contact_options.contact_first_name" + -contact_last_name "$contact_options.contact_last_name" + -contact_email "$contact_options.contact_email" + -contact_address "$contact_options.contact_address" + #if str($contact_options.contact_url).strip() != '': + -contact_url = "$contact_options.contact_url" + #end if + -organization_name "$contact_options.organization_name" + -organization_email "$contact_options.organization_email" + -organization_address "$contact_options.organization_address" + #if str($contact_options.organization_url).strip() != '': + -organization_url = "$contact_options.organization_url" + #end if + #else: + -contact_first_name "Proteomics" + -contact_last_name "Galaxy" + -contact_email "galaxyp@umn.edu" + -contact_address "galaxyp@umn.edu" + -organization_name "University of Minnesota" + -organization_email "galaxyp@umn.edu" + -organization_address "Minneapolis, MN 55455, Vereinigte Staaten" + #end if + 2>> $temp_stderr) + && + #end if + + ## Generate Reports if the user has selected one of the 8 additional reports + ## 'cps', 'mzidentML' and 'zip' are not valid options for PeptideShaker + ## and will not be passed to the command line + #if set(["0","1","2","3","4","5","6","7"]).intersection( set( str( $outputs ).split(',') ) ): + + (peptide-shaker eu.isas.peptideshaker.cmd.ReportCLI + --exec_dir="\$cwd/${bin_dir}" + -temp_folder \$cwd/ReportCLI + -in \$cwd/peptideshaker_output.zip + -out_reports \$cwd/output_reports + #set $cleaned_list = str($outputs).split(',') + #if 'cps' in $cleaned_list: + #silent $cleaned_list.remove('cps') + #end if + #if 'mzidentML' in $cleaned_list: + #silent $cleaned_list.remove('mzidentML') + #end if + #if 'zip' in $cleaned_list: + #silent $cleaned_list.remove('zip') + #end if + ## Only numbers are left over. These corresponds to different reports. + -reports #echo ','.join($cleaned_list)# + + 2>> $temp_stderr) + && + #end if + + ## # ls -l \$cwd/output_reports/* ; + + #if '0' in str($outputs).split(','): + find \$cwd/output_reports -name '*Certificate_of_Analysis*' -exec bash -c 'mv "$0" "certificate.txt"' {} \; + ; + #end if + #if '1' in str($outputs).split(','): + find \$cwd/output_reports -name '*Hierarchical*' -exec bash -c 'mv "$0" "hierarchical.txt"' {} \; + ; + #end if + #if '2' in str($outputs).split(','): + find \$cwd/output_reports -name '*PSM_Phosphorylation_Report*' -exec bash -c 'mv "$0" "psm_phospho.txt"' {} \; + ; + #end if + #if '8' in str($outputs).split(','): + find \$cwd/output_reports -name '*Extended_PSM_Report*' -exec bash -c 'mv "$0" "psmx.txt"' {} \; + ; + #end if + #if '3' in str($outputs).split(','): + find \$cwd/output_reports -name '*PSM_Report*' -exec bash -c 'mv "$0" "psm.txt"' {} \; + ; + #end if + #if '4' in str($outputs).split(','): + find \$cwd/output_reports -name '*Peptide_Phosphorylation_Report*' -exec bash -c 'mv "$0" "peptides_phospho.txt"' {} \; + ; + #end if + #if '5' in str($outputs).split(','): + find \$cwd/output_reports -name '*Peptide_Report*' -exec bash -c 'mv "$0" "peptides.txt"' {} \; + ; + #end if + #if '6' in str($outputs).split(','): + find \$cwd/output_reports -name '*Protein_Phosphorylation_Report*' -exec bash -c 'mv "$0" "proteins_phospho.txt"' {} \; + ; + #end if + #if '7' in str($outputs).split(','): + find \$cwd/output_reports -name '*Protein_Report*' -exec bash -c 'mv "$0" "proteins.txt"' {} \; + ; + #end if + + exit_code_for_galaxy=\$?; + cat $temp_stderr 2>&1; + (exit \$exit_code_for_galaxy) + +]]> + </command> + <inputs> + <param name="searchgui_input" format="searchgui_archive" type="data" label="Compressed SearchGUI results" + help="SearchGUI Results from History"> + <options options_filter_attribute="metadata.searchgui_major_version" > + <filter type="add_value" value="@SEARCHGUI_MAJOR_VERSION@" /> + </options> + </param> + + + <conditional name="contact_options"> + <param name="contact_options_selector" type="select" label="Specify Contact Information for mzIdendML" + help="Create a Galaxy workflow to save these values"> + <option value="no" selected="True">GalaxyP Project contact (Not suitable for PRIDE submission)</option> + <option value="yes">Specify Contact Information</option> + </param> + <when value="no" /> + <when value="yes"> + <param name="contact_first_name" type="text" value="" label="Contact first name."> + <validator type="regex" message="">\S+.*</validator> + </param> + <param name="contact_last_name" type="text" value="" label="Contact last name."> + <validator type="regex" message="">\S+.*</validator> + </param> + <param name="contact_email" type="text" value="" label="Contact e-mail."> + <validator type="regex" message="">\S+@\S+</validator> + </param> + <param name="contact_address" type="text" value="" label="Contact address."> + <validator type="regex" message="">\S+.*</validator> + </param> + <param name="contact_url" type="text" value="" optional="true" label="Contact URL."> + </param> + <param name="organization_name" type="text" value="" label="Organization name."> + <validator type="regex" message="">\S+.*</validator> + </param> + <param name="organization_email" type="text" value="" label="Organization e-mail."> + <validator type="regex" message="">\S+@\S+</validator> + </param> + <param name="organization_address" type="text" value="" label="Organization address."> + <validator type="regex" message="">\S+.*</validator> + </param> + <param name="organization_url" type="text" value="" optional="true" label="Organization URL."> + </param> + </when> + </conditional> + + <param name="include_sequences" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Include the protein sequences in mzIdentML" /> + <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output options"> + <option value="zip">Zip File for import to Desktop App</option> + <option value="mzidentML" selected="True">mzidentML File</option> + <option value="3">PSM Report</option> + <option value="8">Extended PSM Report</option> + <option value="2">PSM Phosphorylation Report</option> + <option value="5">Peptide Report</option> + <option value="4">Peptide Phosphorylation Report</option> + <option value="7">Protein Report</option> + <option value="6">Protein Phosphorylation Report</option> + <option value="0">Certificate of Analysis</option> + <option value="1">Hierarchical Report</option> + <option value="cps">CPS file</option> + <validator type="no_options" message="Please select at least one output file" /> + </param> + + </inputs> + <outputs> + <data format="mzid" name="mzidentML" from_work_dir="output.mzid" label="${tool.name} on ${on_string}: mzidentML file"> + <filter>'mzidentML' in outputs</filter> + </data> + <data format="peptideshaker_archive" name="output_cps" from_work_dir="peptideshaker_output.cpsx" label="${tool.name} on ${on_string}: CPS file"> + <filter>'cps' in outputs</filter> + </data> + <data format="zip" name="output_zip" from_work_dir="peptideshaker_output.zip" label="${tool.name} on ${on_string}: Archive"> + <filter>'zip' in outputs</filter> + </data> + <data format="txt" name="output_certificate" from_work_dir="certificate.txt" label="${tool.name} on ${on_string}: Parameters"> + <filter>'0' in outputs</filter> + </data> + <data format="tabular" name="output_hierarchical" from_work_dir="hierarchical.txt" label="${tool.name} on ${on_string}: Hierarchical Report"> + <filter>'1' in outputs</filter> + </data> + <data format="tabular" name="output_psm_phosphorylation" from_work_dir="psm_phospho.txt" label="${tool.name} on ${on_string}: PSM Phosphorylation Report"> + <filter>'2' in outputs</filter> + </data> + <data format="tabular" name="output_psm" from_work_dir="psm.txt" label="${tool.name} on ${on_string}: PSM Report"> + <filter>'3' in outputs</filter> + </data> + <data format="tabular" name="output_extended_psm" from_work_dir="psmx.txt" label="${tool.name} on ${on_string}: Extended PSM Report"> + <filter>'8' in outputs</filter> + </data> + <data format="tabular" name="output_peptides_phosphorylation" from_work_dir="peptides_phospho.txt" label="${tool.name} on ${on_string}: Peptide Phosphorylation Report"> + <filter>'4' in outputs</filter> + </data> + <data format="tabular" name="output_peptides" from_work_dir="peptides.txt" label="${tool.name} on ${on_string}: Peptide Report"> + <filter>'5' in outputs</filter> + </data> + <data format="tabular" name="output_proteins_phosphorylation" from_work_dir="proteins_phospho.txt" label="${tool.name} on ${on_string}: Protein Phosphorylation Report"> + <filter>'6' in outputs</filter> + </data> + <data format="tabular" name="output_proteins" from_work_dir="proteins.txt" label="${tool.name} on ${on_string}: Protein Report"> + <filter>'7' in outputs</filter> + </data> + </outputs> + <tests> + <test> + <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/> + <param name="processing_options_selector" value="no"/> + <param name="filtering_options_selector" value="no"/> + <param name="outputs" value="zip,3"/> + <output name="output_zip" file="peptide_shaker_result1.zip" ftype="zip" compare="sim_size" delta="3000" /> + <output name="output_psm"> + <assert_contents> + <has_text text="cds.comp41779_c0_seq1" /> + </assert_contents> + </output> + </test> + <test> + <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/> + <param name="processing_options_selector" value="no"/> + <param name="filtering_options_selector" value="yes"/> + <param name="min_peptide_length" value="1"/> + <param name="outputs" value="0,1,2,3,4,5,6,7"/> + <output name="output_certificate"> + <assert_contents> + <has_text text="Tolerance: 100" /> + <has_text text="Carbamidomethylation of C" /> + </assert_contents> + </output> + <output name="output_hierarchical"> + <assert_contents> + <has_text_matching expression="1.1\tcds.comp" /> + </assert_contents> + </output> + <output name="output_psm"> + <assert_contents> + <not_has_text text="Phosphosite" /> + <has_text text="cds.comp41779_c0_seq1" /> + </assert_contents> + </output> + <output name="output_psm_phosphorylation"> + <assert_contents> + <has_text text="Phosphosite" /> + <has_text text="cds.comp41779_c0_seq1" /> + </assert_contents> + </output> + <output name="output_peptides"> + <assert_contents> + <has_text text="Modification" /> + <has_text text="cds.comp41779_c0_seq1" /> + </assert_contents> + </output> + <output name="output_peptides_phosphorylation"> + <assert_contents> + <has_text text="Phosphosite" /> + <has_text text="cds.comp41779_c0_seq1" /> + </assert_contents> + </output> + <output name="output_proteins"> + <assert_contents> + <has_text text="Modification" /> + <has_text text="cds.comp41779_c0_seq1" /> + </assert_contents> + </output> + <output name="output_proteins_phosphorylation"> + <assert_contents> + <has_text text="Phosphosite" /> + <has_text text="cds.comp41779_c0_seq1" /> + </assert_contents> + </output> + </test> + <test> + <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/> + <param name="processing_options_selector" value="no"/> + <param name="filtering_options_selector" value="no"/> + <param name="outputs" value="3,cps"/> + <output name="output_cps" file="peptide_shaker_result1.cpsx" ftype="peptideshaker_archive" compare="sim_size" delta="3000"/> + <output name="output_psm"> + <assert_contents> + <has_text text="cds.comp41779_c0_seq1" /> + </assert_contents> + </output> + </test> + </tests> + <help> +**What it does** + +Combines search results given by SearchGUI after running multiple search engines (X! Tandem, OMSSA and MS-GF+) on any number of MGF peak lists. + +http://compomics.github.io/projects/peptide-shaker.html + +http://compomics.github.io/projects/searchgui.html + +---- + +Reports +======= + + +PSM Report +---------- + +* Protein(s): Protein(s) to which the peptide can be attached +* Sequence: Sequence of the peptide +* Variable Modifications: The variable modifications +* D-score: D-score for variable PTM localization +* probabilistic PTM score: The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization. +* Localization Confidence: The confidence in variable PTM localization. +* Fixed Modifications: The fixed modifications. +* Spectrum File: The spectrum file. +* Spectrum Title: The title of the spectrum. +* Spectrum Scan Number: The spectrum scan number. +* RT: Retention time +* m/z: Measured m/z +* Measured Charge: The charge as given in the spectrum file. +* Identification Charge: The charge as inferred by the search engine. +* Theoretical Mass: The theoretical mass of the peptide. +* Isotope Number: The isotope number targetted by the instrument. +* Precursor m/z Error: The precursor m/z matching error. +* Score: Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs). +* Confidence: Confidence in percent associated to the retained PSM. +* Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. + + +Protein Report +-------------- + +* Main Accession: Main accession of the protein group. +* Description: Description of the protein designed by the main accession. +* Gene Name: The gene names of the Ensembl gene ID associated to the main accession. +* Chromosome: The chromosome of the Ensembl gene ID associated to the main accession. +* PI: Protein Inference status of the protein group. +* Secondary Accessions: Other accessions in the protein group (alphabetical order). +* Protein Group: The complete protein group (alphabetical order). +* #Peptides: Total number of peptides. +* #Validated Peptides: Number of validated peptides. +* #Unique: Total number of peptides unique to this protein group. +* #PSMs: Number of PSMs +* #Validated PSMs: Number of validated PSMs +* Coverage (%): Sequence coverage in percent of the protein designed by the main accession. +* Possible Coverage (%): Possible sequence coverage in percent of the protein designed by the main accession according to the search settings. +* MW (kDa): Molecular Weight. +* Spectrum Counting NSAF: Normalized Spectrum Abundance Factor (NSAF) +* Spectrum Counting emPAI: exponentially modified Protein Abundance Index (emPAI) +* Confident Modification Sites: Number of Confident Modification Sites List of the sites where a variable modification was confidently localized. +* Other Modification Sites: Number of other Modification Sites List of the non*confident sites where a variable modification was localized. +* Score: Score of the protein group. +* Confidence: Confidence in percent associated to the protein group. +* Decoy: Indicates whether the protein group is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. + + +Peptide Report +-------------- + + +* Protein(s): Protein(s) to which this peptide can be attached. +* AAs Before: The amino-acids before the sequence. +* Sequence: Sequence of the peptide. +* AAs After: The amino-acids after the sequence. +* Modified Sequence: The peptide sequence annotated with variable modifications. +* Variable Modifications: The variable modifications. +* Localization Confidence: The confidence in PTMs localization. +* Fixed Modifications: The fixed modifications. +* #Validated PSMs: Number of validated PSMs. +* #PSMs: Number of PSMs. +* Score: Score of the peptide. +* Confidence: Confidence in percent associated to the peptide. +* Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. + + +Hierachical Report +------------------ + +* Main Accession: Main accession of the protein group. +* Description: Description of the protein designed by the main accession. +* PI: Protein Inference status of the protein group. +* Secondary Accessions: Other accessions in the protein group (alphabetical order). +* Protein Group: The complete protein group (alphabetical order). +* #Peptides: Total number of peptides. +* #Validated Peptides: Number of validated peptides. +* #Unique: Total number of peptides unique to this protein group. +* #PSMs: Number of PSMs +* #Validated PSMs: Number of validated PSMs +* Coverage (%): Sequence coverage in percent of the protein designed by the main accession. +* Possible Coverage (%): Possible sequence coverage in percent of the protein designed by the main accession according to the search settings. +* MW (kDa): Molecular Weight. +* Spectrum Counting NSAF: Normalized Spectrum Abundance Factor (NSAF) +* Spectrum Counting emPAI: exponentially modified Protein Abundance Index (emPAI) +* Confident Modification Sites: # Confident Modification Sites List of the sites where a variable modification was confidently localized. +* Other Modification Sites: # Other Modification Sites List of the non-confident sites where a variable modification was localized. +* Score: Score of the protein group. +* Confidence: Confidence in percent associated to the protein group. +* Decoy: Indicates whether the protein group is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. +* Protein(s): Protein(s) to which this peptide can be attached. +* AAs Before: The amino-acids before the sequence. +* Sequence: Sequence of the peptide. +* AAs After: The amino-acids after the sequence. +* Variable Modifications: The variable modifications. +* Localization Confidence: The confidence in PTMs localization. +* Fixed Modifications: The fixed modifications. +* #Validated PSMs: Number of validated PSMs. +* #PSMs: Number of PSMs. +* Score: Score of the peptide. +* Confidence: Confidence in percent associated to the peptide. +* Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. +* Protein(s): Protein(s) to which the peptide can be attached. +* Sequence: Sequence of the peptide. +* Modified Sequence: The peptide sequence annotated with variable modifications. +* Variable Modifications: The variable modifications. +* D-score: D-score for variable PTM localization. +* probabilistic PTM score: The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization. +* Localization Confidence: The confidence in variable PTM localization. +* Fixed Modifications: The fixed modifications. +* Spectrum File: The spectrum file. +* Spectrum Title: The title of the spectrum. +* Spectrum Scan Number: The spectrum scan number. +* RT: Retention time +* m/z: Measured m/z +* Measured Charge: The charge as given in the spectrum file. +* Identification Charge: The charge as inferred by the search engine. +* Theoretical Mass: The theoretical mass of the peptide. +* Isotope Number: The isotope number targetted by the instrument. +* Precursor m/z Error: The precursor m/z matching error. +* Score: Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs). +* Confidence: Confidence in percent associated to the retained PSM. +* Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. + + +------ + +**Citation** + +To cite the underlying tools (PeptideShaker and SearchGUI) please refer to the list of papers at http://compomics.github.io + +If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al. + </help> + <expand macro="citations" /> +</tool>