# HG changeset patch # User bgruening # Date 1399139899 14400 # Node ID 020836ac3b8e3fe54e4325b26e7f2457e0c29d8e # Parent 7939608a991cb27cfbd8d4872047f5927472fb69 Uploaded diff -r 7939608a991c -r 020836ac3b8e peptide_shaker.xml --- a/peptide_shaker.xml Fri May 02 15:34:25 2014 -0400 +++ b/peptide_shaker.xml Sat May 03 13:58:19 2014 -0400 @@ -1,4 +1,4 @@ - + Peform protein identification combining X! Tandem and OMSSA (using SearchGUI) and PeptideShaker pipeline. @@ -21,7 +21,7 @@ #set $input_name = $mgf.display_name.replace(".mgf", "") + ".mgf" ln -s '${mgf}' '${input_name}'; #end for - #ln -s "${input_database}" input_database.fasta; + ##ln -s "${input_database}" input_database.fasta; cp "${input_database}" input_database.fasta; ########################################### @@ -32,7 +32,7 @@ java -cp \$SEARCHGUI_JAR_PATH eu.isas.searchgui.cmd.FastaCLI -in input_database.fasta -decoy; rm input_database.fasta; cp input_database_concatenated_target_decoy.fasta input_database.fasta; - #ln -sf input_database_concatenated_target_decoy.fasta input_database.fasta; + ##ln -sf input_database_concatenated_target_decoy.fasta input_database.fasta; #end if ##################################################### @@ -95,6 +95,12 @@ -mgf_splitting "${mgf_splitting}" -mgf_spectrum_count "${mgf_spectrum_count}" + ## Turn of the protein tree generation as it can produce errors if the search is finished before the tree is created + ## the tree is generated afterwards in PeptideShaker + -protein_tree 0 + + -makeblastdb_folder \$BLAST_ROOT_DIR + #if $advanced.advanced_type_selector == "advanced": #if $advanced.xtandem.xtandem_selector == "yes" @@ -105,7 +111,6 @@ #if $advanced.omssa.omssa_selector == "yes" -omssa 1 - -makeblastdb_folder \$BLAST_ROOT_DIR #else -omssa 0 #end if @@ -204,24 +209,42 @@ -temp_folder `pwd` -in \$cwd/peptide_shaker_output.cps -out_reports \$cwd/output_reports - -reports str($outputs).split(',').remove('cps') + #if 'cps' in str($outputs).split(','): + #set $cleaned_list = str($outputs).split(',') + #silent $cleaned_list.remove('cps') + -reports #echo ','.join($cleaned_list)# + #else: + -reports #echo str($outputs)# + #end if ; + rm /home/bag/test/foo/* -rf ; + cp -R \$cwd /home/bag/test/foo/ ; + #if '0' in str($outputs).split(','): - find . -name '\$cwd/output_reports/*Certificate*' -exec bash -c 'mv "$0" "certificate.txt"' {} \; + find \$cwd/output_reports -name '*Certificate*' -exec bash -c 'mv "$0" "certificate.txt"' {} \; + ; + #end if + #if '2' in str($outputs).split(','): + find \$cwd/output_reports -name '*PSM*' -exec bash -c 'mv "$0" "psm.txt"' {} \; + ; + #end if + #if '3' in str($outputs).split(','): + find \$cwd/output_reports -name '*Peptide*' -exec bash -c 'mv "$0" "peptides.txt"' {} \; + ; + #end if + #if '4' in str($outputs).split(','): + find \$cwd/output_reports -name '*Protein*' -exec bash -c 'mv "$0" "proteins.txt"' {} \; + ; #end if #if '1' in str($outputs).split(','): - find . -name '\$cwd/output_reports/*PSM*' -exec bash -c 'mv "$0" "psm.txt"' {} \; - #end if - #if '2' in str($outputs).split(','): - find . -name '\$cwd/output_reports/*Peptide*' -exec bash -c 'mv "$0" "peptides.txt"' {} \; + find \$cwd/output_reports -name '*Hierarchical*' -exec bash -c 'mv "$0" "hierarchical.txt"' {} \; + ; #end if - #if '3' in str($outputs).split(','): - find . -name '\$cwd/output_reports/*Protein*' -exec bash -c 'mv "$0" "proteins.txt"' {} \; - #end if - #if '4' in str($outputs).split(','): - find . -name '\$cwd/output_reports/*Top*' -exec bash -c 'mv "$0" ".txt"' {} \; - #end if + + ##cd ..; + ## the following line is needed. Otherwise, Galaxy will strip away the ';' + echo 'Finished!'; - - - + + + - + @@ -653,29 +676,167 @@ '0' in outputs - '1' in outputs - - '2' in outputs - + '3' in outputs - + '4' in outputs + + '1' in outputs + **What it does** Runs multiple search engines (X! Tandem and OMSSA) on any number of MGF peak lists using the SearchGUI application and combines the results. +https://code.google.com/p/peptide-shaker/ +https://code.google.com/p/searchgui/ + +**Reports** + +*PSM Report* + +Protein(s) Protein(s) to which the peptide can be attached. +Sequence Sequence of the peptide. +Variable Modifications The variable modifications. +D-score D-score for variable PTM localization. +probabilistic PTM score The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization. +Localization Confidence The confidence in variable PTM localization. +Fixed Modifications The fixed modifications. +Spectrum File The spectrum file. +Spectrum Title The title of the spectrum. +Spectrum Scan Number The spectrum scan number. +RT Retention time +m/z Measured m/z +Measured Charge The charge as given in the spectrum file. +Identification Charge The charge as inferred by the search engine. +Theoretical Mass The theoretical mass of the peptide. +Isotope Number The isotope number targetted by the instrument. +Precursor m/z Error The precursor m/z matching error. +Score Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs). +Confidence Confidence in percent associated to the retained PSM. +Decoy Indicates whether the peptide is a decoy (1: yes, 0: no). +Validation Indicates the validation level of the protein group. + + +*Protein Report* + +Main Accession Main accession of the protein group. +Description Description of the protein designed by the main accession. +Gene Name The gene names of the Ensembl gene ID associated to the main accession. +Chromosome The chromosome of the Ensembl gene ID associated to the main accession. +PI Protein Inference status of the protein group. +Secondary Accessions Other accessions in the protein group (alphabetical order). +Protein Group The complete protein group (alphabetical order). +#Peptides Total number of peptides. +#Validated Peptides Number of validated peptides. +#Unique Total number of peptides unique to this protein group. +#PSMs Number of PSMs +#Validated PSMs Number of validated PSMs +Coverage (%) Sequence coverage in percent of the protein designed by the main accession. +Possible Coverage (%) Possible sequence coverage in percent of the protein designed by the main accession according to the search settings. +MW (kDa) Molecular Weight. +Spectrum Counting NSAF Normalized Spectrum Abundance Factor (NSAF) +Spectrum Counting emPAI exponentially modified Protein Abundance Index (emPAI) +Confident Modification Sites, # Confident Modification Sites List of the sites where a variable modification was confidently localized. +Other Modification Sites, # Other Modification Sites List of the non-confident sites where a variable modification was localized. +Score Score of the protein group. +Confidence Confidence in percent associated to the protein group. +Decoy Indicates whether the protein group is a decoy (1: yes, 0: no). +Validation Indicates the validation level of the protein group. + + +*Peptide Report* + + +Protein(s) Protein(s) to which this peptide can be attached. +AAs Before The amino-acids before the sequence. +Sequence Sequence of the peptide. +AAs After The amino-acids after the sequence. +Modified Sequence The peptide sequence annotated with variable modifications. +Variable Modifications The variable modifications. +Localization Confidence The confidence in PTMs localization. +Fixed Modifications The fixed modifications. +#Validated PSMs Number of validated PSMs. +#PSMs Number of PSMs. +Score Score of the peptide. +Confidence Confidence in percent associated to the peptide. +Decoy Indicates whether the peptide is a decoy (1: yes, 0: no). +Validation Indicates the validation level of the protein group. + + +*Hirachical Report* + +Main Accession Main accession of the protein group. +Description Description of the protein designed by the main accession. +PI Protein Inference status of the protein group. +Secondary Accessions Other accessions in the protein group (alphabetical order). +Protein Group The complete protein group (alphabetical order). +#Peptides Total number of peptides. +#Validated Peptides Number of validated peptides. +#Unique Total number of peptides unique to this protein group. +#PSMs Number of PSMs +#Validated PSMs Number of validated PSMs +Coverage (%) Sequence coverage in percent of the protein designed by the main accession. +Possible Coverage (%) Possible sequence coverage in percent of the protein designed by the main accession according to the search settings. +MW (kDa) Molecular Weight. +Spectrum Counting NSAF Normalized Spectrum Abundance Factor (NSAF) +Spectrum Counting emPAI exponentially modified Protein Abundance Index (emPAI) +Confident Modification Sites, # Confident Modification Sites List of the sites where a variable modification was confidently localized. +Other Modification Sites, # Other Modification Sites List of the non-confident sites where a variable modification was localized. +Score Score of the protein group. +Confidence Confidence in percent associated to the protein group. +Decoy Indicates whether the protein group is a decoy (1: yes, 0: no). +Validation Indicates the validation level of the protein group. +Protein(s) Protein(s) to which this peptide can be attached. +AAs Before The amino-acids before the sequence. +Sequence Sequence of the peptide. +AAs After The amino-acids after the sequence. +Variable Modifications The variable modifications. +Localization Confidence The confidence in PTMs localization. +Fixed Modifications The fixed modifications. +#Validated PSMs Number of validated PSMs. +#PSMs Number of PSMs. +Score Score of the peptide. +Confidence Confidence in percent associated to the peptide. +Decoy Indicates whether the peptide is a decoy (1: yes, 0: no). +Validation Indicates the validation level of the protein group. +Protein(s) Protein(s) to which the peptide can be attached. +Sequence Sequence of the peptide. +Modified Sequence The peptide sequence annotated with variable modifications. +Variable Modifications The variable modifications. +D-score D-score for variable PTM localization. +probabilistic PTM score The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization. +Localization Confidence The confidence in variable PTM localization. +Fixed Modifications The fixed modifications. +Spectrum File The spectrum file. +Spectrum Title The title of the spectrum. +Spectrum Scan Number The spectrum scan number. +RT Retention time +m/z Measured m/z +Measured Charge The charge as given in the spectrum file. +Identification Charge The charge as inferred by the search engine. +Theoretical Mass The theoretical mass of the peptide. +Isotope Number The isotope number targetted by the instrument. +Precursor m/z Error The precursor m/z matching error. +Score Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs). +Confidence Confidence in percent associated to the retained PSM. +Decoy Indicates whether the peptide is a decoy (1: yes, 0: no). +Validation Indicates the validation level of the protein group. + + + + ------ **Citation** For the underlying tool, please cite `TODO` -If you use this tool in Galaxy, please cite Chilton J, et al. https://bitbucket.org/galaxyp/peptideshaker +If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al. https://bitbucket.org/galaxyp/peptideshaker