Mercurial > repos > galaxyp > meta_proteome_analyzer

diff meta_proteome_analyzer.xml @ 0:c0abf055f6f1 draft
Uploaded
author: galaxyp
date: Fri, 03 Mar 2017 11:44:29 -0500
children: b41e6d379c5f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/meta_proteome_analyzer.xml	Fri Mar 03 11:44:29 2017 -0500
@@ -0,0 +1,287 @@
+<tool id="meta_proteome_analyzer" name="MetaProteomeAnalyzer" version="1.4.1">
+    <description>
+        functional and taxonomic characterization of proteins
+    </description>
+    <requirements>
+        <requirement type="package" version="1.4.1">mpa-portable</requirement>
+    </requirements>
+    <command>
+<![CDATA[
+        #set $temp_stderr = "mpa_stderr"
+
+        cwd=`pwd`;
+        mkdir -p output_dir;
+        ## copy mpa conf dir to working dir
+        jar_dir=`mpa-portable -get_jar_dir`;
+        cp -R jar_dir/conf .;
+
+        ## echo the search engines to run
+        echo "$search_engines_options.engines";
+        echo "DB: ${input_database.display_name} sequences: ${input_database.metadata.sequences}";
+
+        #for $mgf in $peak_lists:
+            #set $input_name = $mgf.display_name.split('/')[-1].replace(".mgf", "") + ".mgf"
+            ln -s -f '${mgf}' '${input_name}';
+            #set $encoded_id = $__app__.security.encode_id($mgf.id)
+            echo "Spectrums:${mgf.display_name}(API:${encoded_id}) ";
+        #end for
+        ##ln -s "${input_database}" input_database.fasta;
+        cp "${input_database}" input_database.fasta;
+
+        ######################
+        ## MPA ##
+        ######################
+        (mpa-portable de.mpa.cli.CmdLineInterface -Djava.awt.headless=true -Xmx2048m
+            -spectrum_files \$cwd
+            -database input_database.fasta
+            -missed_cleav $missed_cleavages
+            -prec_tol ${precursor_options.prec_tol}${precursor_options.prec_tol_units}
+            -frag_tol ${precursor_options.frag_tol}
+            -xtandem #if 'X!Tandem' in $search_engines_options.engines 1 else 0#
+            -comet #if 'Comet' in $search_engines_options.engines 1 else 0#
+            -msgf #if 'MSGF' in $search_engines_options.engines 1 else 0#
+            -output_folder output_dir
+            -threads "\${GALAXY_SLOTS:-12}"
+         2> $temp_stderr) &&
+        find \$cwd/output_dir -name '*_metaproteins.csv' -exec bash -c 'mv "$0" "metaproteins.csv"' {} \; ;
+        find \$cwd/output_dir -name '*_metaprotein_taxa.csv' -exec bash -c 'mv "$0" "metaprotein_taxa.csv"' {} \; ;
+        find \$cwd/output_dir -name '*_peptides.csv' -exec bash -c 'mv "$0" "peptides.csv"' {} \; ;
+        find \$cwd/output_dir -name '*_proteins.csv' -exec bash -c 'mv "$0" "proteins.csv"' {} \; ;
+        find \$cwd/output_dir -name '*_psms.csv' -exec bash -c 'mv "$0" "psms.csv"' {} \; ;
+        find \$cwd/output_dir -name '*_spectrum_ids.csv' -exec bash -c 'mv "$0" "spectrum_ids.csv"' {} \; ;
+        exit_code_for_galaxy=\$?;
+        cat $temp_stderr 2>&1;
+        (exit \$exit_code_for_galaxy)
+         
+]]>
+    </command>
+    <inputs>
+        <param format="fasta" name="input_database" type="data" label="Protein Database"
+            help="Select FASTA database from history"/>
+        <param name="peak_lists" format="mgf" type="data" multiple="true" label="Input Peak Lists (mgf)"
+            help="Select appropriate MGF dataset(s) from history" />
+        <param name="missed_cleavages" type="integer" value="2" label="Maximum Missed Cleavages"
+            help="Allow peptides to contain up to this many missed enzyme cleavage sites."/>
+        <section name="precursor_options" expanded="false" title="Precursor Options">
+            <param name="prec_tol_units" type="select" label="Precursor Ion Tolerance Units"
+                help="Select based on instrument used, as different machines provide different quality of spectra. ppm is a standard for most precursor ions">
+                <option value="ppm">Parts per million (ppm)</option>
+                <option value="Da">Daltons</option>
+            </param>
+            <param name="prec_tol" type="float" value="10" label="Percursor Ion Tolerance"
+                help="Provide error value for precursor ion, based on instrument used. 10 ppm recommended for Orbitrap instrument"/>
+            <param name="frag_tol" type="float" value="0.5" label="Fragment Tolerance (Daltons)"
+                help="Provide error value for fragment ions, based on instrument used"/>
+        </section>
+        <!-- Search Engine Selection -->
+        <section name="search_engines_options" expanded="false" title="Search Engine Options">
+            <param name="engines" type="select" display="checkboxes" multiple="True" label="DB-Search Engines">
+                <help>Comet and Tide shouldn't both be selected since they use a similar algoritm.</help>
+                <option value="X!Tandem" selected="True">X!Tandem</option>
+                <option value="MSGF">MS-GF+</option>
+                <option value="Comet">Comet</option>
+            </param>
+        </section>
+
+    </inputs>
+    <outputs>
+        <data format="tabular" name="output_proteins" from_work_dir="proteins.csv" label="${tool.name} on ${on_string}: proteins">
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="Protein_No,Protein_Accession,Protein_Description,Protein_Taxonomy,Sequence_Coverage,Peptide_Count,NSAF,emPAI,Spectral_Count,Isoelectric_Point,Molecular_Weight,Protein_Sequence,Peptides" />
+            </actions>
+        </data>
+        <data format="tabular" name="output_peptides" from_work_dir="peptides.csv" label="${tool.name} on ${on_string}: peptides">
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="Peptide_Num,Protein_Accessions,Peptide_Sequence,Protein_Count,Spectral_Count,Taxonomic_Group,Taxonomic_Rank,NCBI_Taxonomy_ID" />
+            </actions>
+        </data>
+        <data format="tabular" name="output_PSMs" from_work_dir="psms.csv" label="${tool.name} on ${on_string}: PSMs">
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="PSM_Num,Protein_Accessions,Peptide_Sequence,Spectrum_Title,Charge,Search_Engine,q-value,Score" />
+            </actions>
+        </data>
+        <data format="tabular" name="output_spectrum_ids" from_work_dir="spectrum_ids.csv" label="${tool.name} on ${on_string}: spectrum_ids">
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="Spectrum_Number,Spectrum_ID,Spectrum_Title,Peptides,Protein_Accessions" />
+            </actions>
+        </data>
+        <data format="tabular" name="output_metaproteins" from_work_dir="metaproteins.csv" label="${tool.name} on ${on_string}: metaproteins">
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="Meta-Protein_Num,Meta-Protein_Accession,Meta-Protein_Description,Meta-Protein_Taxonomy,Meta-Protein_UniRef100,Meta-Protein_UniRef90,Meta-Protein_UniRef50,Meta-Protein_KO,Meta-Protein_EC,Peptide_Count,Spectral_Count,Proteins,Peptides" />
+            </actions>
+        </data>
+        <data format="tabular" name="output_metaprotein_taxa" from_work_dir="metaprotein_taxa.csv" label="${tool.name} on ${on_string}: metaprotein_taxa">
+            <actions>
+                <action name="comment_lines" type="metadata" default="1" />
+                <action name="column_names" type="metadata" default="Unclassified,Superkingdom,Kingdom,Phylum,Class,Order,Family,Genus,Species,Subspecies,Num_Peptides,Spectral_Count" />
+            </actions>
+        </data>
+    </outputs>
+    <tests>
+    </tests>
+    <help>
+**What it does**
+
+Runs multiple search engines (X! Tandem, OMSSA and MS-GF+) on any number of MGF peak lists using the SearchGUI application and combines the results.
+
+http://compomics.github.io/projects/peptide-shaker.html
+http://compomics.github.io/projects/searchgui.html
+
+----
+
+Reports
+=======
+
+
+PSM Report
+----------
+
+* Protein(s):                Protein(s) to which the peptide can be attached
+* Sequence:                  Sequence of the peptide
+* Variable Modifications:   The variable modifications
+* D-score:	                D-score for variable PTM localization
+* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
+* Localization Confidence:	The confidence in variable PTM localization.
+* Fixed Modifications:	    The fixed modifications.
+* Spectrum File:	The spectrum file.
+* Spectrum Title:	The title of the spectrum.
+* Spectrum Scan Number:	The spectrum scan number.
+* RT:	Retention time
+* m/z:	Measured m/z
+* Measured Charge:	The charge as given in the spectrum file.
+* Identification Charge:	The charge as inferred by the search engine.
+* Theoretical Mass:	The theoretical mass of the peptide.
+* Isotope Number:	The isotope number targetted by the instrument.
+* Precursor m/z Error:	The precursor m/z matching error.
+* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
+* Confidence:	Confidence in percent associated to the retained PSM.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation: Indicates the validation level of the protein group.
+
+
+Protein Report
+--------------
+
+* Main Accession:	Main accession of the protein group.
+* Description:	Description of the protein designed by the main accession.
+* Gene Name:	The gene names of the Ensembl gene ID associated to the main accession.
+* Chromosome:	The chromosome of the Ensembl gene ID associated to the main accession.
+* PI:	Protein Inference status of the protein group.
+* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
+* Protein Group:	The complete protein group (alphabetical order).
+* #Peptides:	Total number of peptides.
+* #Validated Peptides:	Number of validated peptides.
+* #Unique:	Total number of peptides unique to this protein group.
+* #PSMs:	Number of PSMs
+* #Validated PSMs:	Number of validated PSMs
+* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
+* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
+* MW (kDa):	Molecular Weight.
+* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
+* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
+* Confident Modification Sites: Number of Confident Modification Sites	List of the sites where a variable modification was confidently localized.
+* Other Modification Sites: Number of other Modification Sites	List of the non*confident sites where a variable modification was localized.
+* Score:	Score of the protein group.
+* Confidence:	Confidence in percent associated to the protein group.
+* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+
+
+Peptide Report
+--------------
+
+
+* Protein(s):	Protein(s) to which this peptide can be attached.
+* AAs Before:	The amino-acids before the sequence.
+* Sequence:	Sequence of the peptide.
+* AAs After:	The amino-acids after the sequence.
+* Modified Sequence:	The peptide sequence annotated with variable modifications.
+* Variable Modifications:	The variable modifications.
+* Localization Confidence:	The confidence in PTMs localization.
+* Fixed Modifications:	The fixed modifications.
+* #Validated PSMs:	Number of validated PSMs.
+* #PSMs:	Number of PSMs.
+* Score:	Score of the peptide.
+* Confidence:	Confidence in percent associated to the peptide.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+
+
+Hierachical Report
+------------------
+
+* Main Accession:	Main accession of the protein group.
+* Description:	Description of the protein designed by the main accession.
+* PI:	Protein Inference status of the protein group.
+* Secondary Accessions:	Other accessions in the protein group (alphabetical order).
+* Protein Group:	The complete protein group (alphabetical order).
+* #Peptides:	Total number of peptides.
+* #Validated Peptides:	Number of validated peptides.
+* #Unique:	Total number of peptides unique to this protein group.
+* #PSMs:	Number of PSMs
+* #Validated PSMs:	Number of validated PSMs
+* Coverage (%):	Sequence coverage in percent of the protein designed by the main accession.
+* Possible Coverage (%):	Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
+* MW (kDa):	Molecular Weight.
+* Spectrum Counting NSAF: 	Normalized Spectrum Abundance Factor (NSAF)
+* Spectrum Counting emPAI:	exponentially modified Protein Abundance Index (emPAI)
+* Confident Modification Sites: # Confident Modification Sites	List of the sites where a variable modification was confidently localized.
+* Other Modification Sites: # Other Modification Sites	List of the non-confident sites where a variable modification was localized.
+* Score:	Score of the protein group.
+* Confidence:	Confidence in percent associated to the protein group.
+* Decoy:	Indicates whether the protein group is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+* Protein(s):	Protein(s) to which this peptide can be attached.
+* AAs Before:	The amino-acids before the sequence.
+* Sequence:	Sequence of the peptide.
+* AAs After:	The amino-acids after the sequence.
+* Variable Modifications:	The variable modifications.
+* Localization Confidence:	The confidence in PTMs localization.
+* Fixed Modifications:	The fixed modifications.
+* #Validated PSMs:	Number of validated PSMs.
+* #PSMs:	Number of PSMs.
+* Score:	Score of the peptide.
+* Confidence:	Confidence in percent associated to the peptide.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+* Protein(s):	Protein(s) to which the peptide can be attached.
+* Sequence:	Sequence of the peptide.
+* Modified Sequence:	The peptide sequence annotated with variable modifications.
+* Variable Modifications:	The variable modifications.
+* D-score:	D-score for variable PTM localization.
+* probabilistic PTM score:	The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
+* Localization Confidence:	The confidence in variable PTM localization.
+* Fixed Modifications:	The fixed modifications.
+* Spectrum File:	The spectrum file.
+* Spectrum Title:	The title of the spectrum.
+* Spectrum Scan Number:	The spectrum scan number.
+* RT:	Retention time
+* m/z:	Measured m/z
+* Measured Charge:	The charge as given in the spectrum file.
+* Identification Charge:	The charge as inferred by the search engine.
+* Theoretical Mass:	The theoretical mass of the peptide.
+* Isotope Number:	The isotope number targetted by the instrument.
+* Precursor m/z Error:	The precursor m/z matching error.
+* Score:	Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
+* Confidence:	Confidence in percent associated to the retained PSM.
+* Decoy:	Indicates whether the peptide is a decoy (1: yes, 0: no).
+* Validation:	Indicates the validation level of the protein group.
+
+
+------
+
+**Citation**
+
+To cite the underlying tools (PeptideShaker and SearchGUI) please refer to the list of papers at http://compomics.github.io
+
+If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al.
+    </help>
+    <citations>
+        <citation type="doi">10.1021/pr501246w</citation>
+    </citations>
+</tool>
author	galaxyp
date	Fri, 03 Mar 2017 11:44:29 -0500
parents
children	b41e6d379c5f