changeset 4:020836ac3b8e draft

Uploaded
author bgruening
date Sat, 03 May 2014 13:58:19 -0400
parents 7939608a991c
children 4b15d68afa6c
files peptide_shaker.xml
diffstat 1 files changed, 187 insertions(+), 26 deletions(-) [+]
line wrap: on
line diff
--- a/peptide_shaker.xml	Fri May 02 15:34:25 2014 -0400
+++ b/peptide_shaker.xml	Sat May 03 13:58:19 2014 -0400
@@ -1,4 +1,4 @@
-<tool id="peptide_shaker" name="Peptide Shaker" version="0.1.0">
+<tool id="peptide_shaker" name="Peptide Shaker" version="1.18.3.0">
     <description>
         Peform protein identification combining X! Tandem and OMSSA (using SearchGUI) and PeptideShaker pipeline.
     </description>
@@ -21,7 +21,7 @@
             #set $input_name = $mgf.display_name.replace(".mgf", "") + ".mgf"
             ln -s '${mgf}' '${input_name}';
         #end for
-        #ln -s "${input_database}" input_database.fasta;
+        ##ln -s "${input_database}" input_database.fasta;
         cp "${input_database}" input_database.fasta;
 
         ###########################################
@@ -32,7 +32,7 @@
             java -cp \$SEARCHGUI_JAR_PATH eu.isas.searchgui.cmd.FastaCLI -in input_database.fasta -decoy;
             rm input_database.fasta;
             cp input_database_concatenated_target_decoy.fasta input_database.fasta;
-            #ln -sf input_database_concatenated_target_decoy.fasta input_database.fasta;
+            ##ln -sf input_database_concatenated_target_decoy.fasta input_database.fasta;
         #end if
 
         #####################################################
@@ -95,6 +95,12 @@
             -mgf_splitting "${mgf_splitting}"
             -mgf_spectrum_count "${mgf_spectrum_count}"
 
+            ## Turn of the protein tree generation as it can produce errors if the search is finished before the tree is created
+            ## the tree is generated afterwards in PeptideShaker
+            -protein_tree 0
+
+            -makeblastdb_folder \$BLAST_ROOT_DIR
+
             #if $advanced.advanced_type_selector == "advanced":
 
                 #if $advanced.xtandem.xtandem_selector == "yes"
@@ -105,7 +111,6 @@
 
                 #if $advanced.omssa.omssa_selector == "yes"
                     -omssa 1
-                    -makeblastdb_folder \$BLAST_ROOT_DIR
                 #else
                     -omssa 0
                 #end if
@@ -204,24 +209,42 @@
             -temp_folder `pwd`
             -in \$cwd/peptide_shaker_output.cps
             -out_reports \$cwd/output_reports
-            -reports str($outputs).split(',').remove('cps')
+            #if 'cps' in str($outputs).split(','):
+                #set $cleaned_list = str($outputs).split(',')
+                #silent $cleaned_list.remove('cps')
+                -reports #echo ','.join($cleaned_list)#
+            #else:
+                -reports #echo str($outputs)#
+            #end if
         ;
 
+        rm /home/bag/test/foo/* -rf ;
+        cp -R \$cwd /home/bag/test/foo/ ;
+
         #if '0' in str($outputs).split(','):
-            find . -name '\$cwd/output_reports/*Certificate*' -exec bash -c 'mv "$0" "certificate.txt"' {} \;
+            find \$cwd/output_reports -name '*Certificate*' -exec bash -c 'mv "$0" "certificate.txt"' {} \;
+            ;
+        #end if
+        #if '2' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*PSM*' -exec bash -c 'mv "$0" "psm.txt"' {} \;
+            ;
+        #end if
+        #if '3' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*Peptide*' -exec bash -c 'mv "$0" "peptides.txt"' {} \;
+            ;
+        #end if
+        #if '4' in str($outputs).split(','):
+            find \$cwd/output_reports -name '*Protein*' -exec bash -c 'mv "$0" "proteins.txt"' {} \;
+            ;
         #end if
         #if '1' in str($outputs).split(','):
-            find . -name '\$cwd/output_reports/*PSM*' -exec bash -c 'mv "$0" "psm.txt"' {} \;
-        #end if
-        #if '2' in str($outputs).split(','):
-            find . -name '\$cwd/output_reports/*Peptide*' -exec bash -c 'mv "$0" "peptides.txt"' {} \;
+            find \$cwd/output_reports -name '*Hierarchical*' -exec bash -c 'mv "$0" "hierarchical.txt"' {} \;
+            ;
         #end if
-        #if '3' in str($outputs).split(','):
-            find . -name '\$cwd/output_reports/*Protein*' -exec bash -c 'mv "$0" "proteins.txt"' {} \;
-        #end if
-        #if '4' in str($outputs).split(','):
-            find . -name '\$cwd/output_reports/*Top*' -exec bash -c 'mv "$0" ".txt"' {} \;
-        #end if
+
+        ##cd ..;
+        ## the following line is needed. Otherwise, Galaxy will strip away the ';'
+        echo 'Finished!';
     </command>
     <inputs>
         <param format="fasta" name="input_database" type="data" label="Protein Database"
@@ -636,11 +659,11 @@
         </conditional>
 
         <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output options">
-            <option value="2" selected="True">Peptide Report</option>
-            <option value="3" selected="True">Protein Report</option>
-            <option value="1">PSM Report</option>
+            <option value="3" selected="True">Peptide Report</option>
+            <option value="4" selected="True">Protein Report</option>
+            <option value="2">PSM Report</option>
             <option value="0">Certificate of Analysis</option>
-            <option value="4">Top Down Report</option>
+            <option value="1">Hierarchical Report</option>
             <option value="cps">CPS file</option>
             <validator type="no_options" message="Please select at least one output file." />
         </param>
@@ -653,29 +676,167 @@
             <filter>'0' in outputs</filter>
         </data>
         <data format="text" name="output_psm" from_work_dir="psm.txt" label="${tool.name} on ${on_string}: PSM Report">
-            <filter>'1' in outputs</filter>
-        </data>
-        <data format="tabular" name="output_peptides" from_work_dir="peptides.txt" label="${tool.name} on ${on_string}: Peptide Report">
             <filter>'2' in outputs</filter>
         </data>
-        <data format="tabular" name="output_proteins" from_work_dir="proteins.txt" label="${tool.name} on ${on_string}: Protein Report">
+        <data format="tabular" name="output_peptides" from_work_dir="peptides.txt" label="${tool.name} on ${on_string}: Peptide Report">
             <filter>'3' in outputs</filter>
         </data>
-        <data format="tabular" name="output_top_down" from_work_dir="top_down.txt" label="${tool.name} on ${on_string}: Top Down Report">
+        <data format="tabular" name="output_proteins" from_work_dir="proteins.txt" label="${tool.name} on ${on_string}: Protein Report">
             <filter>'4' in outputs</filter>
         </data>
+        <data format="tabular" name="output_hierarchical" from_work_dir="hierarchical.txt" label="${tool.name} on ${on_string}: Hierarchical Report">
+            <filter>'1' in outputs</filter>
+        </data>
     </outputs>
     <help>
 **What it does**
 
 Runs multiple search engines (X! Tandem and OMSSA) on any number of MGF peak lists using the SearchGUI application and combines the results.
 
+https://code.google.com/p/peptide-shaker/
+https://code.google.com/p/searchgui/
+
+**Reports**
+
+*PSM Report*
+
+Protein(s)	Protein(s) to which the peptide can be attached.
+Sequence	Sequence of the peptide.
+Variable Modifications	The variable modifications.
+D-score	D-score for variable PTM localization.
+probabilistic PTM score	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
+Localization Confidence	The confidence in variable PTM localization.
+Fixed Modifications	The fixed modifications.
+Spectrum File	The spectrum file.
+Spectrum Title	The title of the spectrum.
+Spectrum Scan Number	The spectrum scan number.
+RT	Retention time
+m/z	Measured m/z
+Measured Charge	The charge as given in the spectrum file.
+Identification Charge	The charge as inferred by the search engine.
+Theoretical Mass	The theoretical mass of the peptide.
+Isotope Number	The isotope number targetted by the instrument.
+Precursor m/z Error	The precursor m/z matching error.
+Score	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
+Confidence	Confidence in percent associated to the retained PSM.
+Decoy	Indicates whether the peptide is a decoy (1: yes, 0: no).
+Validation	Indicates the validation level of the protein group.
+
+
+*Protein Report*
+
+Main Accession	Main accession of the protein group.
+Description	Description of the protein designed by the main accession.
+Gene Name	The gene names of the Ensembl gene ID associated to the main accession.
+Chromosome	The chromosome of the Ensembl gene ID associated to the main accession.
+PI	Protein Inference status of the protein group.
+Secondary Accessions	Other accessions in the protein group (alphabetical order).
+Protein Group	The complete protein group (alphabetical order).
+#Peptides	Total number of peptides.
+#Validated Peptides	Number of validated peptides.
+#Unique	Total number of peptides unique to this protein group.
+#PSMs	Number of PSMs
+#Validated PSMs	Number of validated PSMs
+Coverage (%)	Sequence coverage in percent of the protein designed by the main accession.
+Possible Coverage (%)	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
+MW (kDa)	Molecular Weight.
+Spectrum Counting NSAF 	Normalized Spectrum Abundance Factor (NSAF)
+Spectrum Counting emPAI	exponentially modified Protein Abundance Index (emPAI)
+Confident Modification Sites, # Confident Modification Sites	List of the sites where a variable modification was confidently localized.
+Other Modification Sites, # Other Modification Sites	List of the non-confident sites where a variable modification was localized.
+Score	Score of the protein group.
+Confidence	Confidence in percent associated to the protein group.
+Decoy	Indicates whether the protein group is a decoy (1: yes, 0: no).
+Validation	Indicates the validation level of the protein group.
+
+
+*Peptide Report*
+
+
+Protein(s)	Protein(s) to which this peptide can be attached.
+AAs Before	The amino-acids before the sequence.
+Sequence	Sequence of the peptide.
+AAs After	The amino-acids after the sequence.
+Modified Sequence	The peptide sequence annotated with variable modifications.
+Variable Modifications	The variable modifications.
+Localization Confidence	The confidence in PTMs localization.
+Fixed Modifications	The fixed modifications.
+#Validated PSMs	Number of validated PSMs.
+#PSMs	Number of PSMs.
+Score	Score of the peptide.
+Confidence	Confidence in percent associated to the peptide.
+Decoy	Indicates whether the peptide is a decoy (1: yes, 0: no).
+Validation	Indicates the validation level of the protein group.
+
+
+*Hirachical Report*
+
+Main Accession	Main accession of the protein group.
+Description	Description of the protein designed by the main accession.
+PI	Protein Inference status of the protein group.
+Secondary Accessions	Other accessions in the protein group (alphabetical order).
+Protein Group	The complete protein group (alphabetical order).
+#Peptides	Total number of peptides.
+#Validated Peptides	Number of validated peptides.
+#Unique	Total number of peptides unique to this protein group.
+#PSMs	Number of PSMs
+#Validated PSMs	Number of validated PSMs
+Coverage (%)	Sequence coverage in percent of the protein designed by the main accession.
+Possible Coverage (%)	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
+MW (kDa)	Molecular Weight.
+Spectrum Counting NSAF 	Normalized Spectrum Abundance Factor (NSAF)
+Spectrum Counting emPAI	exponentially modified Protein Abundance Index (emPAI)
+Confident Modification Sites, # Confident Modification Sites	List of the sites where a variable modification was confidently localized.
+Other Modification Sites, # Other Modification Sites	List of the non-confident sites where a variable modification was localized.
+Score	Score of the protein group.
+Confidence	Confidence in percent associated to the protein group.
+Decoy	Indicates whether the protein group is a decoy (1: yes, 0: no).
+Validation	Indicates the validation level of the protein group.
+Protein(s)	Protein(s) to which this peptide can be attached.
+AAs Before	The amino-acids before the sequence.
+Sequence	Sequence of the peptide.
+AAs After	The amino-acids after the sequence.
+Variable Modifications	The variable modifications.
+Localization Confidence	The confidence in PTMs localization.
+Fixed Modifications	The fixed modifications.
+#Validated PSMs	Number of validated PSMs.
+#PSMs	Number of PSMs.
+Score	Score of the peptide.
+Confidence	Confidence in percent associated to the peptide.
+Decoy	Indicates whether the peptide is a decoy (1: yes, 0: no).
+Validation	Indicates the validation level of the protein group.
+Protein(s)	Protein(s) to which the peptide can be attached.
+Sequence	Sequence of the peptide.
+Modified Sequence	The peptide sequence annotated with variable modifications.
+Variable Modifications	The variable modifications.
+D-score	D-score for variable PTM localization.
+probabilistic PTM score	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
+Localization Confidence	The confidence in variable PTM localization.
+Fixed Modifications	The fixed modifications.
+Spectrum File	The spectrum file.
+Spectrum Title	The title of the spectrum.
+Spectrum Scan Number	The spectrum scan number.
+RT	Retention time
+m/z	Measured m/z
+Measured Charge	The charge as given in the spectrum file.
+Identification Charge	The charge as inferred by the search engine.
+Theoretical Mass	The theoretical mass of the peptide.
+Isotope Number	The isotope number targetted by the instrument.
+Precursor m/z Error	The precursor m/z matching error.
+Score	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
+Confidence	Confidence in percent associated to the retained PSM.
+Decoy	Indicates whether the peptide is a decoy (1: yes, 0: no).
+Validation	Indicates the validation level of the protein group.
+
+
+
+
 ------
 
 **Citation**
 
 For the underlying tool, please cite `TODO`
 
-If you use this tool in Galaxy, please cite Chilton J, et al. https://bitbucket.org/galaxyp/peptideshaker
+If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al. https://bitbucket.org/galaxyp/peptideshaker
     </help>
 </tool>