Mercurial > repos > galaxyp > meta_proteome_analyzer
diff meta_proteome_analyzer.xml @ 0:c0abf055f6f1 draft
Uploaded
| author | galaxyp |
|---|---|
| date | Fri, 03 Mar 2017 11:44:29 -0500 |
| parents | |
| children | b41e6d379c5f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/meta_proteome_analyzer.xml Fri Mar 03 11:44:29 2017 -0500 @@ -0,0 +1,287 @@ +<tool id="meta_proteome_analyzer" name="MetaProteomeAnalyzer" version="1.4.1"> + <description> + functional and taxonomic characterization of proteins + </description> + <requirements> + <requirement type="package" version="1.4.1">mpa-portable</requirement> + </requirements> + <command> +<![CDATA[ + #set $temp_stderr = "mpa_stderr" + + cwd=`pwd`; + mkdir -p output_dir; + ## copy mpa conf dir to working dir + jar_dir=`mpa-portable -get_jar_dir`; + cp -R jar_dir/conf .; + + ## echo the search engines to run + echo "$search_engines_options.engines"; + echo "DB: ${input_database.display_name} sequences: ${input_database.metadata.sequences}"; + + #for $mgf in $peak_lists: + #set $input_name = $mgf.display_name.split('/')[-1].replace(".mgf", "") + ".mgf" + ln -s -f '${mgf}' '${input_name}'; + #set $encoded_id = $__app__.security.encode_id($mgf.id) + echo "Spectrums:${mgf.display_name}(API:${encoded_id}) "; + #end for + ##ln -s "${input_database}" input_database.fasta; + cp "${input_database}" input_database.fasta; + + ###################### + ## MPA ## + ###################### + (mpa-portable de.mpa.cli.CmdLineInterface -Djava.awt.headless=true -Xmx2048m + -spectrum_files \$cwd + -database input_database.fasta + -missed_cleav $missed_cleavages + -prec_tol ${precursor_options.prec_tol}${precursor_options.prec_tol_units} + -frag_tol ${precursor_options.frag_tol} + -xtandem #if 'X!Tandem' in $search_engines_options.engines 1 else 0# + -comet #if 'Comet' in $search_engines_options.engines 1 else 0# + -msgf #if 'MSGF' in $search_engines_options.engines 1 else 0# + -output_folder output_dir + -threads "\${GALAXY_SLOTS:-12}" + 2> $temp_stderr) && + find \$cwd/output_dir -name '*_metaproteins.csv' -exec bash -c 'mv "$0" "metaproteins.csv"' {} \; ; + find \$cwd/output_dir -name '*_metaprotein_taxa.csv' -exec bash -c 'mv "$0" "metaprotein_taxa.csv"' {} \; ; + find \$cwd/output_dir -name '*_peptides.csv' -exec bash -c 'mv "$0" "peptides.csv"' {} \; ; + find \$cwd/output_dir -name '*_proteins.csv' -exec bash -c 'mv "$0" "proteins.csv"' {} \; ; + find \$cwd/output_dir -name '*_psms.csv' -exec bash -c 'mv "$0" "psms.csv"' {} \; ; + find \$cwd/output_dir -name '*_spectrum_ids.csv' -exec bash -c 'mv "$0" "spectrum_ids.csv"' {} \; ; + exit_code_for_galaxy=\$?; + cat $temp_stderr 2>&1; + (exit \$exit_code_for_galaxy) + +]]> + </command> + <inputs> + <param format="fasta" name="input_database" type="data" label="Protein Database" + help="Select FASTA database from history"/> + <param name="peak_lists" format="mgf" type="data" multiple="true" label="Input Peak Lists (mgf)" + help="Select appropriate MGF dataset(s) from history" /> + <param name="missed_cleavages" type="integer" value="2" label="Maximum Missed Cleavages" + help="Allow peptides to contain up to this many missed enzyme cleavage sites."/> + <section name="precursor_options" expanded="false" title="Precursor Options"> + <param name="prec_tol_units" type="select" label="Precursor Ion Tolerance Units" + help="Select based on instrument used, as different machines provide different quality of spectra. ppm is a standard for most precursor ions"> + <option value="ppm">Parts per million (ppm)</option> + <option value="Da">Daltons</option> + </param> + <param name="prec_tol" type="float" value="10" label="Percursor Ion Tolerance" + help="Provide error value for precursor ion, based on instrument used. 10 ppm recommended for Orbitrap instrument"/> + <param name="frag_tol" type="float" value="0.5" label="Fragment Tolerance (Daltons)" + help="Provide error value for fragment ions, based on instrument used"/> + </section> + <!-- Search Engine Selection --> + <section name="search_engines_options" expanded="false" title="Search Engine Options"> + <param name="engines" type="select" display="checkboxes" multiple="True" label="DB-Search Engines"> + <help>Comet and Tide shouldn't both be selected since they use a similar algoritm.</help> + <option value="X!Tandem" selected="True">X!Tandem</option> + <option value="MSGF">MS-GF+</option> + <option value="Comet">Comet</option> + </param> + </section> + + </inputs> + <outputs> + <data format="tabular" name="output_proteins" from_work_dir="proteins.csv" label="${tool.name} on ${on_string}: proteins"> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="Protein_No,Protein_Accession,Protein_Description,Protein_Taxonomy,Sequence_Coverage,Peptide_Count,NSAF,emPAI,Spectral_Count,Isoelectric_Point,Molecular_Weight,Protein_Sequence,Peptides" /> + </actions> + </data> + <data format="tabular" name="output_peptides" from_work_dir="peptides.csv" label="${tool.name} on ${on_string}: peptides"> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="Peptide_Num,Protein_Accessions,Peptide_Sequence,Protein_Count,Spectral_Count,Taxonomic_Group,Taxonomic_Rank,NCBI_Taxonomy_ID" /> + </actions> + </data> + <data format="tabular" name="output_PSMs" from_work_dir="psms.csv" label="${tool.name} on ${on_string}: PSMs"> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="PSM_Num,Protein_Accessions,Peptide_Sequence,Spectrum_Title,Charge,Search_Engine,q-value,Score" /> + </actions> + </data> + <data format="tabular" name="output_spectrum_ids" from_work_dir="spectrum_ids.csv" label="${tool.name} on ${on_string}: spectrum_ids"> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="Spectrum_Number,Spectrum_ID,Spectrum_Title,Peptides,Protein_Accessions" /> + </actions> + </data> + <data format="tabular" name="output_metaproteins" from_work_dir="metaproteins.csv" label="${tool.name} on ${on_string}: metaproteins"> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="Meta-Protein_Num,Meta-Protein_Accession,Meta-Protein_Description,Meta-Protein_Taxonomy,Meta-Protein_UniRef100,Meta-Protein_UniRef90,Meta-Protein_UniRef50,Meta-Protein_KO,Meta-Protein_EC,Peptide_Count,Spectral_Count,Proteins,Peptides" /> + </actions> + </data> + <data format="tabular" name="output_metaprotein_taxa" from_work_dir="metaprotein_taxa.csv" label="${tool.name} on ${on_string}: metaprotein_taxa"> + <actions> + <action name="comment_lines" type="metadata" default="1" /> + <action name="column_names" type="metadata" default="Unclassified,Superkingdom,Kingdom,Phylum,Class,Order,Family,Genus,Species,Subspecies,Num_Peptides,Spectral_Count" /> + </actions> + </data> + </outputs> + <tests> + </tests> + <help> +**What it does** + +Runs multiple search engines (X! Tandem, OMSSA and MS-GF+) on any number of MGF peak lists using the SearchGUI application and combines the results. + +http://compomics.github.io/projects/peptide-shaker.html +http://compomics.github.io/projects/searchgui.html + +---- + +Reports +======= + + +PSM Report +---------- + +* Protein(s): Protein(s) to which the peptide can be attached +* Sequence: Sequence of the peptide +* Variable Modifications: The variable modifications +* D-score: D-score for variable PTM localization +* probabilistic PTM score: The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization. +* Localization Confidence: The confidence in variable PTM localization. +* Fixed Modifications: The fixed modifications. +* Spectrum File: The spectrum file. +* Spectrum Title: The title of the spectrum. +* Spectrum Scan Number: The spectrum scan number. +* RT: Retention time +* m/z: Measured m/z +* Measured Charge: The charge as given in the spectrum file. +* Identification Charge: The charge as inferred by the search engine. +* Theoretical Mass: The theoretical mass of the peptide. +* Isotope Number: The isotope number targetted by the instrument. +* Precursor m/z Error: The precursor m/z matching error. +* Score: Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs). +* Confidence: Confidence in percent associated to the retained PSM. +* Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. + + +Protein Report +-------------- + +* Main Accession: Main accession of the protein group. +* Description: Description of the protein designed by the main accession. +* Gene Name: The gene names of the Ensembl gene ID associated to the main accession. +* Chromosome: The chromosome of the Ensembl gene ID associated to the main accession. +* PI: Protein Inference status of the protein group. +* Secondary Accessions: Other accessions in the protein group (alphabetical order). +* Protein Group: The complete protein group (alphabetical order). +* #Peptides: Total number of peptides. +* #Validated Peptides: Number of validated peptides. +* #Unique: Total number of peptides unique to this protein group. +* #PSMs: Number of PSMs +* #Validated PSMs: Number of validated PSMs +* Coverage (%): Sequence coverage in percent of the protein designed by the main accession. +* Possible Coverage (%): Possible sequence coverage in percent of the protein designed by the main accession according to the search settings. +* MW (kDa): Molecular Weight. +* Spectrum Counting NSAF: Normalized Spectrum Abundance Factor (NSAF) +* Spectrum Counting emPAI: exponentially modified Protein Abundance Index (emPAI) +* Confident Modification Sites: Number of Confident Modification Sites List of the sites where a variable modification was confidently localized. +* Other Modification Sites: Number of other Modification Sites List of the non*confident sites where a variable modification was localized. +* Score: Score of the protein group. +* Confidence: Confidence in percent associated to the protein group. +* Decoy: Indicates whether the protein group is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. + + +Peptide Report +-------------- + + +* Protein(s): Protein(s) to which this peptide can be attached. +* AAs Before: The amino-acids before the sequence. +* Sequence: Sequence of the peptide. +* AAs After: The amino-acids after the sequence. +* Modified Sequence: The peptide sequence annotated with variable modifications. +* Variable Modifications: The variable modifications. +* Localization Confidence: The confidence in PTMs localization. +* Fixed Modifications: The fixed modifications. +* #Validated PSMs: Number of validated PSMs. +* #PSMs: Number of PSMs. +* Score: Score of the peptide. +* Confidence: Confidence in percent associated to the peptide. +* Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. + + +Hierachical Report +------------------ + +* Main Accession: Main accession of the protein group. +* Description: Description of the protein designed by the main accession. +* PI: Protein Inference status of the protein group. +* Secondary Accessions: Other accessions in the protein group (alphabetical order). +* Protein Group: The complete protein group (alphabetical order). +* #Peptides: Total number of peptides. +* #Validated Peptides: Number of validated peptides. +* #Unique: Total number of peptides unique to this protein group. +* #PSMs: Number of PSMs +* #Validated PSMs: Number of validated PSMs +* Coverage (%): Sequence coverage in percent of the protein designed by the main accession. +* Possible Coverage (%): Possible sequence coverage in percent of the protein designed by the main accession according to the search settings. +* MW (kDa): Molecular Weight. +* Spectrum Counting NSAF: Normalized Spectrum Abundance Factor (NSAF) +* Spectrum Counting emPAI: exponentially modified Protein Abundance Index (emPAI) +* Confident Modification Sites: # Confident Modification Sites List of the sites where a variable modification was confidently localized. +* Other Modification Sites: # Other Modification Sites List of the non-confident sites where a variable modification was localized. +* Score: Score of the protein group. +* Confidence: Confidence in percent associated to the protein group. +* Decoy: Indicates whether the protein group is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. +* Protein(s): Protein(s) to which this peptide can be attached. +* AAs Before: The amino-acids before the sequence. +* Sequence: Sequence of the peptide. +* AAs After: The amino-acids after the sequence. +* Variable Modifications: The variable modifications. +* Localization Confidence: The confidence in PTMs localization. +* Fixed Modifications: The fixed modifications. +* #Validated PSMs: Number of validated PSMs. +* #PSMs: Number of PSMs. +* Score: Score of the peptide. +* Confidence: Confidence in percent associated to the peptide. +* Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. +* Protein(s): Protein(s) to which the peptide can be attached. +* Sequence: Sequence of the peptide. +* Modified Sequence: The peptide sequence annotated with variable modifications. +* Variable Modifications: The variable modifications. +* D-score: D-score for variable PTM localization. +* probabilistic PTM score: The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization. +* Localization Confidence: The confidence in variable PTM localization. +* Fixed Modifications: The fixed modifications. +* Spectrum File: The spectrum file. +* Spectrum Title: The title of the spectrum. +* Spectrum Scan Number: The spectrum scan number. +* RT: Retention time +* m/z: Measured m/z +* Measured Charge: The charge as given in the spectrum file. +* Identification Charge: The charge as inferred by the search engine. +* Theoretical Mass: The theoretical mass of the peptide. +* Isotope Number: The isotope number targetted by the instrument. +* Precursor m/z Error: The precursor m/z matching error. +* Score: Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs). +* Confidence: Confidence in percent associated to the retained PSM. +* Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). +* Validation: Indicates the validation level of the protein group. + + +------ + +**Citation** + +To cite the underlying tools (PeptideShaker and SearchGUI) please refer to the list of papers at http://compomics.github.io + +If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al. + </help> + <citations> + <citation type="doi">10.1021/pr501246w</citation> + </citations> +</tool>
