comparison peptide_shaker.xml @ 0:8b99cb00e1c4 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/peptideshaker commit 3c83b8f8ca60d6399bc5dd7096b830ca1f1c457e-dirty
author jjohnson
date Tue, 15 May 2018 14:50:35 -0400
parents
children fa76abf69433
comparison
equal deleted inserted replaced
-1:000000000000 0:8b99cb00e1c4
1 <tool id="peptide_shaker" name="Peptide Shaker" version="1.16.22.0">
2 <description>
3 Perform protein identification using various search engines based on results from SearchGUI
4 </description>
5 <macros>
6 <import>macros_basic.xml</import>
7 </macros>
8 <requirements>
9 <requirement type="package" version="1.16.22">peptide-shaker</requirement>
10 </requirements>
11 <expand macro="stdio" />
12 <command>
13 <![CDATA[
14 #from datetime import datetime
15 #set $exp_str = "Galaxy_Experiment_%s" % datetime.now().strftime("%Y%m%d%H%M%s")
16 #set $samp_str = "Sample_%s" % datetime.now().strftime("%Y%m%d%H%M%s")
17 #set $temp_stderr = "peptideshaker_stderr"
18 #set $bin_dir = "bin"
19
20
21 mkdir output_reports;
22 cwd=`pwd`;
23
24 ln -s '$searchgui_input' searchgui_input.zip &&
25 jar xvf searchgui_input.zip SEARCHGUI_IdentificationParameters.par &&
26 ######################
27 ## PeptideShakerCLI ##
28 ######################
29 (peptide-shaker eu.isas.peptideshaker.cmd.PeptideShakerCLI
30 --exec_dir="\$cwd/${bin_dir}"
31 -temp_folder \$cwd/PeptideShakerCLI
32 -experiment '$exp_str'
33 -sample '$samp_str'
34 -replicate 1
35 -identification_files \$cwd/searchgui_input.zip
36 -id_params \$cwd/SEARCHGUI_IdentificationParameters.par
37 -out \$cwd/peptideshaker_output.cpsx
38 -zip \$cwd/peptideshaker_output.zip
39 #set $cleaned_list = str($outputs).split(',')
40 #if 'cps' in $cleaned_list:
41 #silent $cleaned_list.remove('cps')
42 #end if
43 #if 'mzidentML' in $cleaned_list:
44 #silent $cleaned_list.remove('mzidentML')
45 #end if
46 #if 'zip' in $cleaned_list:
47 #silent $cleaned_list.remove('zip')
48 #end if
49 #if len($cleaned_list) > 0
50 ## Only numbers are left over. These corresponds to different reports.
51 -reports #echo ','.join($cleaned_list)#
52 #end if
53
54 -threads "\${GALAXY_SLOTS:-12}"
55
56
57
58 2>> $temp_stderr)
59
60 &&
61
62 echo "Running Reports";
63
64 ##################################
65 ## PeptideShaker Report options ##
66 ##################################
67
68 #if 'mzidentML' in str($outputs).split(','):
69 echo "Generating mzIdentML";
70 (peptide-shaker eu.isas.peptideshaker.cmd.MzidCLI
71 --exec_dir="\$cwd/${bin_dir}"
72 -in \$cwd/peptideshaker_output.zip
73 -output_file \$cwd/output.mzid
74 #if $contact_options.contact_options_selector == "yes":
75 -contact_first_name "$contact_options.contact_first_name"
76 -contact_last_name "$contact_options.contact_last_name"
77 -contact_email "$contact_options.contact_email"
78 -contact_address "$contact_options.contact_address"
79 #if str($contact_options.contact_url).strip() != '':
80 -contact_url = "$contact_options.contact_url"
81 #end if
82 -organization_name "$contact_options.organization_name"
83 -organization_email "$contact_options.organization_email"
84 -organization_address "$contact_options.organization_address"
85 #if str($contact_options.organization_url).strip() != '':
86 -organization_url = "$contact_options.organization_url"
87 #end if
88 #else:
89 -contact_first_name "Proteomics"
90 -contact_last_name "Galaxy"
91 -contact_email "galaxyp@umn.edu"
92 -contact_address "galaxyp@umn.edu"
93 -organization_name "University of Minnesota"
94 -organization_email "galaxyp@umn.edu"
95 -organization_address "Minneapolis, MN 55455, Vereinigte Staaten"
96 #end if
97 2>> $temp_stderr)
98 &&
99 #end if
100
101 ## Generate Reports if the user has selected one of the 8 additional reports
102 ## 'cps', 'mzidentML' and 'zip' are not valid options for PeptideShaker
103 ## and will not be passed to the command line
104 #if set(["0","1","2","3","4","5","6","7"]).intersection( set( str( $outputs ).split(',') ) ):
105
106 (peptide-shaker eu.isas.peptideshaker.cmd.ReportCLI
107 --exec_dir="\$cwd/${bin_dir}"
108 -temp_folder \$cwd/ReportCLI
109 -in \$cwd/peptideshaker_output.zip
110 -out_reports \$cwd/output_reports
111 #set $cleaned_list = str($outputs).split(',')
112 #if 'cps' in $cleaned_list:
113 #silent $cleaned_list.remove('cps')
114 #end if
115 #if 'mzidentML' in $cleaned_list:
116 #silent $cleaned_list.remove('mzidentML')
117 #end if
118 #if 'zip' in $cleaned_list:
119 #silent $cleaned_list.remove('zip')
120 #end if
121 ## Only numbers are left over. These corresponds to different reports.
122 -reports #echo ','.join($cleaned_list)#
123
124 2>> $temp_stderr)
125 &&
126 #end if
127
128 ## # ls -l \$cwd/output_reports/* ;
129
130 #if '0' in str($outputs).split(','):
131 find \$cwd/output_reports -name '*Certificate_of_Analysis*' -exec bash -c 'mv "$0" "certificate.txt"' {} \;
132 ;
133 #end if
134 #if '1' in str($outputs).split(','):
135 find \$cwd/output_reports -name '*Hierarchical*' -exec bash -c 'mv "$0" "hierarchical.txt"' {} \;
136 ;
137 #end if
138 #if '2' in str($outputs).split(','):
139 find \$cwd/output_reports -name '*PSM_Phosphorylation_Report*' -exec bash -c 'mv "$0" "psm_phospho.txt"' {} \;
140 ;
141 #end if
142 #if '8' in str($outputs).split(','):
143 find \$cwd/output_reports -name '*Extended_PSM_Report*' -exec bash -c 'mv "$0" "psmx.txt"' {} \;
144 ;
145 #end if
146 #if '3' in str($outputs).split(','):
147 find \$cwd/output_reports -name '*PSM_Report*' -exec bash -c 'mv "$0" "psm.txt"' {} \;
148 ;
149 #end if
150 #if '4' in str($outputs).split(','):
151 find \$cwd/output_reports -name '*Peptide_Phosphorylation_Report*' -exec bash -c 'mv "$0" "peptides_phospho.txt"' {} \;
152 ;
153 #end if
154 #if '5' in str($outputs).split(','):
155 find \$cwd/output_reports -name '*Peptide_Report*' -exec bash -c 'mv "$0" "peptides.txt"' {} \;
156 ;
157 #end if
158 #if '6' in str($outputs).split(','):
159 find \$cwd/output_reports -name '*Protein_Phosphorylation_Report*' -exec bash -c 'mv "$0" "proteins_phospho.txt"' {} \;
160 ;
161 #end if
162 #if '7' in str($outputs).split(','):
163 find \$cwd/output_reports -name '*Protein_Report*' -exec bash -c 'mv "$0" "proteins.txt"' {} \;
164 ;
165 #end if
166
167 exit_code_for_galaxy=\$?;
168 cat $temp_stderr 2>&1;
169 (exit \$exit_code_for_galaxy)
170
171 ]]>
172 </command>
173 <inputs>
174 <param name="searchgui_input" format="searchgui_archive" type="data" label="Compressed SearchGUI results"
175 help="SearchGUI Results from History">
176 <options options_filter_attribute="metadata.searchgui_major_version" >
177 <filter type="add_value" value="@SEARCHGUI_MAJOR_VERSION@" />
178 </options>
179 </param>
180
181
182 <conditional name="contact_options">
183 <param name="contact_options_selector" type="select" label="Specify Contact Information for mzIdendML"
184 help="Create a Galaxy workflow to save these values">
185 <option value="no" selected="True">GalaxyP Project contact (Not suitable for PRIDE submission)</option>
186 <option value="yes">Specify Contact Information</option>
187 </param>
188 <when value="no" />
189 <when value="yes">
190 <param name="contact_first_name" type="text" value="" label="Contact first name.">
191 <validator type="regex" message="">\S+.*</validator>
192 </param>
193 <param name="contact_last_name" type="text" value="" label="Contact last name.">
194 <validator type="regex" message="">\S+.*</validator>
195 </param>
196 <param name="contact_email" type="text" value="" label="Contact e-mail.">
197 <validator type="regex" message="">\S+@\S+</validator>
198 </param>
199 <param name="contact_address" type="text" value="" label="Contact address.">
200 <validator type="regex" message="">\S+.*</validator>
201 </param>
202 <param name="contact_url" type="text" value="" optional="true" label="Contact URL.">
203 </param>
204 <param name="organization_name" type="text" value="" label="Organization name.">
205 <validator type="regex" message="">\S+.*</validator>
206 </param>
207 <param name="organization_email" type="text" value="" label="Organization e-mail.">
208 <validator type="regex" message="">\S+@\S+</validator>
209 </param>
210 <param name="organization_address" type="text" value="" label="Organization address.">
211 <validator type="regex" message="">\S+.*</validator>
212 </param>
213 <param name="organization_url" type="text" value="" optional="true" label="Organization URL.">
214 </param>
215 </when>
216 </conditional>
217
218 <param name="include_sequences" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Include the protein sequences in mzIdentML" />
219 <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output options">
220 <option value="zip">Zip File for import to Desktop App</option>
221 <option value="mzidentML" selected="True">mzidentML File</option>
222 <option value="3">PSM Report</option>
223 <option value="8">Extended PSM Report</option>
224 <option value="2">PSM Phosphorylation Report</option>
225 <option value="5">Peptide Report</option>
226 <option value="4">Peptide Phosphorylation Report</option>
227 <option value="7">Protein Report</option>
228 <option value="6">Protein Phosphorylation Report</option>
229 <option value="0">Certificate of Analysis</option>
230 <option value="1">Hierarchical Report</option>
231 <option value="cps">CPS file</option>
232 <validator type="no_options" message="Please select at least one output file" />
233 </param>
234
235 </inputs>
236 <outputs>
237 <data format="mzid" name="mzidentML" from_work_dir="output.mzid" label="${tool.name} on ${on_string}: mzidentML file">
238 <filter>'mzidentML' in outputs</filter>
239 </data>
240 <data format="peptideshaker_archive" name="output_cps" from_work_dir="peptideshaker_output.cpsx" label="${tool.name} on ${on_string}: CPS file">
241 <filter>'cps' in outputs</filter>
242 </data>
243 <data format="zip" name="output_zip" from_work_dir="peptideshaker_output.zip" label="${tool.name} on ${on_string}: Archive">
244 <filter>'zip' in outputs</filter>
245 </data>
246 <data format="txt" name="output_certificate" from_work_dir="certificate.txt" label="${tool.name} on ${on_string}: Parameters">
247 <filter>'0' in outputs</filter>
248 </data>
249 <data format="tabular" name="output_hierarchical" from_work_dir="hierarchical.txt" label="${tool.name} on ${on_string}: Hierarchical Report">
250 <filter>'1' in outputs</filter>
251 </data>
252 <data format="tabular" name="output_psm_phosphorylation" from_work_dir="psm_phospho.txt" label="${tool.name} on ${on_string}: PSM Phosphorylation Report">
253 <filter>'2' in outputs</filter>
254 </data>
255 <data format="tabular" name="output_psm" from_work_dir="psm.txt" label="${tool.name} on ${on_string}: PSM Report">
256 <filter>'3' in outputs</filter>
257 </data>
258 <data format="tabular" name="output_extended_psm" from_work_dir="psmx.txt" label="${tool.name} on ${on_string}: Extended PSM Report">
259 <filter>'8' in outputs</filter>
260 </data>
261 <data format="tabular" name="output_peptides_phosphorylation" from_work_dir="peptides_phospho.txt" label="${tool.name} on ${on_string}: Peptide Phosphorylation Report">
262 <filter>'4' in outputs</filter>
263 </data>
264 <data format="tabular" name="output_peptides" from_work_dir="peptides.txt" label="${tool.name} on ${on_string}: Peptide Report">
265 <filter>'5' in outputs</filter>
266 </data>
267 <data format="tabular" name="output_proteins_phosphorylation" from_work_dir="proteins_phospho.txt" label="${tool.name} on ${on_string}: Protein Phosphorylation Report">
268 <filter>'6' in outputs</filter>
269 </data>
270 <data format="tabular" name="output_proteins" from_work_dir="proteins.txt" label="${tool.name} on ${on_string}: Protein Report">
271 <filter>'7' in outputs</filter>
272 </data>
273 </outputs>
274 <tests>
275 <test>
276 <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/>
277 <param name="processing_options_selector" value="no"/>
278 <param name="filtering_options_selector" value="no"/>
279 <param name="outputs" value="zip,3"/>
280 <output name="output_zip" file="peptide_shaker_result1.zip" ftype="zip" compare="sim_size" delta="3000" />
281 <output name="output_psm">
282 <assert_contents>
283 <has_text text="cds.comp41779_c0_seq1" />
284 </assert_contents>
285 </output>
286 </test>
287 <test>
288 <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/>
289 <param name="processing_options_selector" value="no"/>
290 <param name="filtering_options_selector" value="yes"/>
291 <param name="min_peptide_length" value="1"/>
292 <param name="outputs" value="0,1,2,3,4,5,6,7"/>
293 <output name="output_certificate">
294 <assert_contents>
295 <has_text text="Tolerance: 100" />
296 <has_text text="Carbamidomethylation of C" />
297 </assert_contents>
298 </output>
299 <output name="output_hierarchical">
300 <assert_contents>
301 <has_text_matching expression="1.1\tcds.comp" />
302 </assert_contents>
303 </output>
304 <output name="output_psm">
305 <assert_contents>
306 <not_has_text text="Phosphosite" />
307 <has_text text="cds.comp41779_c0_seq1" />
308 </assert_contents>
309 </output>
310 <output name="output_psm_phosphorylation">
311 <assert_contents>
312 <has_text text="Phosphosite" />
313 <has_text text="cds.comp41779_c0_seq1" />
314 </assert_contents>
315 </output>
316 <output name="output_peptides">
317 <assert_contents>
318 <has_text text="Modification" />
319 <has_text text="cds.comp41779_c0_seq1" />
320 </assert_contents>
321 </output>
322 <output name="output_peptides_phosphorylation">
323 <assert_contents>
324 <has_text text="Phosphosite" />
325 <has_text text="cds.comp41779_c0_seq1" />
326 </assert_contents>
327 </output>
328 <output name="output_proteins">
329 <assert_contents>
330 <has_text text="Modification" />
331 <has_text text="cds.comp41779_c0_seq1" />
332 </assert_contents>
333 </output>
334 <output name="output_proteins_phosphorylation">
335 <assert_contents>
336 <has_text text="Phosphosite" />
337 <has_text text="cds.comp41779_c0_seq1" />
338 </assert_contents>
339 </output>
340 </test>
341 <test>
342 <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/>
343 <param name="processing_options_selector" value="no"/>
344 <param name="filtering_options_selector" value="no"/>
345 <param name="outputs" value="3,cps"/>
346 <output name="output_cps" file="peptide_shaker_result1.cpsx" ftype="peptideshaker_archive" compare="sim_size" delta="3000"/>
347 <output name="output_psm">
348 <assert_contents>
349 <has_text text="cds.comp41779_c0_seq1" />
350 </assert_contents>
351 </output>
352 </test>
353 </tests>
354 <help>
355 **What it does**
356
357 Combines search results given by SearchGUI after running multiple search engines (X! Tandem, OMSSA and MS-GF+) on any number of MGF peak lists.
358
359 http://compomics.github.io/projects/peptide-shaker.html
360
361 http://compomics.github.io/projects/searchgui.html
362
363 ----
364
365 Reports
366 =======
367
368
369 PSM Report
370 ----------
371
372 * Protein(s): Protein(s) to which the peptide can be attached
373 * Sequence: Sequence of the peptide
374 * Variable Modifications: The variable modifications
375 * D-score: D-score for variable PTM localization
376 * probabilistic PTM score: The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
377 * Localization Confidence: The confidence in variable PTM localization.
378 * Fixed Modifications: The fixed modifications.
379 * Spectrum File: The spectrum file.
380 * Spectrum Title: The title of the spectrum.
381 * Spectrum Scan Number: The spectrum scan number.
382 * RT: Retention time
383 * m/z: Measured m/z
384 * Measured Charge: The charge as given in the spectrum file.
385 * Identification Charge: The charge as inferred by the search engine.
386 * Theoretical Mass: The theoretical mass of the peptide.
387 * Isotope Number: The isotope number targetted by the instrument.
388 * Precursor m/z Error: The precursor m/z matching error.
389 * Score: Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
390 * Confidence: Confidence in percent associated to the retained PSM.
391 * Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no).
392 * Validation: Indicates the validation level of the protein group.
393
394
395 Protein Report
396 --------------
397
398 * Main Accession: Main accession of the protein group.
399 * Description: Description of the protein designed by the main accession.
400 * Gene Name: The gene names of the Ensembl gene ID associated to the main accession.
401 * Chromosome: The chromosome of the Ensembl gene ID associated to the main accession.
402 * PI: Protein Inference status of the protein group.
403 * Secondary Accessions: Other accessions in the protein group (alphabetical order).
404 * Protein Group: The complete protein group (alphabetical order).
405 * #Peptides: Total number of peptides.
406 * #Validated Peptides: Number of validated peptides.
407 * #Unique: Total number of peptides unique to this protein group.
408 * #PSMs: Number of PSMs
409 * #Validated PSMs: Number of validated PSMs
410 * Coverage (%): Sequence coverage in percent of the protein designed by the main accession.
411 * Possible Coverage (%): Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
412 * MW (kDa): Molecular Weight.
413 * Spectrum Counting NSAF: Normalized Spectrum Abundance Factor (NSAF)
414 * Spectrum Counting emPAI: exponentially modified Protein Abundance Index (emPAI)
415 * Confident Modification Sites: Number of Confident Modification Sites List of the sites where a variable modification was confidently localized.
416 * Other Modification Sites: Number of other Modification Sites List of the non*confident sites where a variable modification was localized.
417 * Score: Score of the protein group.
418 * Confidence: Confidence in percent associated to the protein group.
419 * Decoy: Indicates whether the protein group is a decoy (1: yes, 0: no).
420 * Validation: Indicates the validation level of the protein group.
421
422
423 Peptide Report
424 --------------
425
426
427 * Protein(s): Protein(s) to which this peptide can be attached.
428 * AAs Before: The amino-acids before the sequence.
429 * Sequence: Sequence of the peptide.
430 * AAs After: The amino-acids after the sequence.
431 * Modified Sequence: The peptide sequence annotated with variable modifications.
432 * Variable Modifications: The variable modifications.
433 * Localization Confidence: The confidence in PTMs localization.
434 * Fixed Modifications: The fixed modifications.
435 * #Validated PSMs: Number of validated PSMs.
436 * #PSMs: Number of PSMs.
437 * Score: Score of the peptide.
438 * Confidence: Confidence in percent associated to the peptide.
439 * Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no).
440 * Validation: Indicates the validation level of the protein group.
441
442
443 Hierachical Report
444 ------------------
445
446 * Main Accession: Main accession of the protein group.
447 * Description: Description of the protein designed by the main accession.
448 * PI: Protein Inference status of the protein group.
449 * Secondary Accessions: Other accessions in the protein group (alphabetical order).
450 * Protein Group: The complete protein group (alphabetical order).
451 * #Peptides: Total number of peptides.
452 * #Validated Peptides: Number of validated peptides.
453 * #Unique: Total number of peptides unique to this protein group.
454 * #PSMs: Number of PSMs
455 * #Validated PSMs: Number of validated PSMs
456 * Coverage (%): Sequence coverage in percent of the protein designed by the main accession.
457 * Possible Coverage (%): Possible sequence coverage in percent of the protein designed by the main accession according to the search settings.
458 * MW (kDa): Molecular Weight.
459 * Spectrum Counting NSAF: Normalized Spectrum Abundance Factor (NSAF)
460 * Spectrum Counting emPAI: exponentially modified Protein Abundance Index (emPAI)
461 * Confident Modification Sites: # Confident Modification Sites List of the sites where a variable modification was confidently localized.
462 * Other Modification Sites: # Other Modification Sites List of the non-confident sites where a variable modification was localized.
463 * Score: Score of the protein group.
464 * Confidence: Confidence in percent associated to the protein group.
465 * Decoy: Indicates whether the protein group is a decoy (1: yes, 0: no).
466 * Validation: Indicates the validation level of the protein group.
467 * Protein(s): Protein(s) to which this peptide can be attached.
468 * AAs Before: The amino-acids before the sequence.
469 * Sequence: Sequence of the peptide.
470 * AAs After: The amino-acids after the sequence.
471 * Variable Modifications: The variable modifications.
472 * Localization Confidence: The confidence in PTMs localization.
473 * Fixed Modifications: The fixed modifications.
474 * #Validated PSMs: Number of validated PSMs.
475 * #PSMs: Number of PSMs.
476 * Score: Score of the peptide.
477 * Confidence: Confidence in percent associated to the peptide.
478 * Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no).
479 * Validation: Indicates the validation level of the protein group.
480 * Protein(s): Protein(s) to which the peptide can be attached.
481 * Sequence: Sequence of the peptide.
482 * Modified Sequence: The peptide sequence annotated with variable modifications.
483 * Variable Modifications: The variable modifications.
484 * D-score: D-score for variable PTM localization.
485 * probabilistic PTM score: The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization.
486 * Localization Confidence: The confidence in variable PTM localization.
487 * Fixed Modifications: The fixed modifications.
488 * Spectrum File: The spectrum file.
489 * Spectrum Title: The title of the spectrum.
490 * Spectrum Scan Number: The spectrum scan number.
491 * RT: Retention time
492 * m/z: Measured m/z
493 * Measured Charge: The charge as given in the spectrum file.
494 * Identification Charge: The charge as inferred by the search engine.
495 * Theoretical Mass: The theoretical mass of the peptide.
496 * Isotope Number: The isotope number targetted by the instrument.
497 * Precursor m/z Error: The precursor m/z matching error.
498 * Score: Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs).
499 * Confidence: Confidence in percent associated to the retained PSM.
500 * Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no).
501 * Validation: Indicates the validation level of the protein group.
502
503
504 ------
505
506 **Citation**
507
508 To cite the underlying tools (PeptideShaker and SearchGUI) please refer to the list of papers at http://compomics.github.io
509
510 If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al.
511 </help>
512 <expand macro="citations" />
513 </tool>