Mercurial > repos > peterjc > secreted_protein_workflow
changeset 0:3a6de5cb858d draft
Uploaded first attempt at a workflow with dependencies on the Tool Shed
author | peterjc |
---|---|
date | Mon, 06 May 2013 11:11:11 -0400 |
parents | |
children | 9f2fea8a5d32 |
files | README.md repository_dependencies.xml secreted_protein_workflow.ga |
diffstat | 3 files changed, 320 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon May 06 11:11:11 2013 -0400 @@ -0,0 +1,25 @@ +This Tool Shed Repository contains a workflow for the identification of candidate secreted proteins from a given protein FASATA file. + +It runs SignalP v3.0 and selects only proteins with a strong predicted signal peptide, and then runs TMHMM v2.0 on those, and selects only proteins without a predicted trans-membrane helix. This workflow was used in Kikuchi et al (2001), and is a simplification of the candidate effector protocol described in Jones et al (2009). + +Kikuchi T, Cotton JA, Dalzell JJ, Hasegawa K, Kanzaki N, et al. (2011) Genomic insights into the origin of parasitism in the emerging plant pathogen Bursaphelenchus xylophilus. PLoS Pathog 7: e1002219. +http://dx.doi.org/10.1371/journal.ppat.1002219 + +Jones JT, Kumar A, Pylypenko LA, Thirugnanasambandam A, Castelli L, et al. (2009) Identification and functional characterization of effectors in expressed sequence tags from various life cycle stages of the potato cyst nematode Globodera pallida. Mol Plant Pathol 10: 815–28. +http://dx.doi.org/10.1111/j.1364-3703.2009.00585.x + +Bendtsen JD, Nielsen H, von Heijne G, Brunak S (2004) Improved prediction of signal peptides: SignalP 3.0. J Mol Biol 340: 783–95. +http://dx.doi.org/10.1016/j.jmb.2004.05.028 + +Krogh A, Larsson B, von Heijne G, Sonnhammer E (2001) Predicting transmembrane protein topology with a hidden Markov model: application to complete genomes. J Mol Biol 305: 567- 580. +http://dx.doi.org/10.1006/jmbi.2000.4315 + + +Availability +============ + +This workflow is available on the main Galaxy Tool Shed: +http://toolshed.g2.bx.psu.edu/view/peterjc/secreted_protein_workflow + +Development is being done on github here: +https://github.com/peterjc/picobio/tree/master/galaxy_workflows/secreted_protein_workflow
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Mon May 06 11:11:11 2013 -0400 @@ -0,0 +1,7 @@ +<?xml version="1.0"?> +<repositories description="This requires my SignalP and TMHMM wrapers, and my FASTA filtering tool."> + <!-- Revision 15:6abd809cefdd on the main tool shed is v0.2.4, the current latest - but older should be OK --> + <repository toolshed="http://toolshed.g2.bx.psu.edu" name="tmhmm_and_signalp" owner="peterjc" changeset_revision="6abd809cefdd" /> + <!-- Revision 2:abdd608c869b on the main tool shed is v0.0.5, the current latest - but older should be OK --> + <repository toolshed="http://toolshed.g2.bx.psu.edu" name="seq_filter_by_id" owner="peterjc" changeset_revision="abdd608c869b" /> +</repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/secreted_protein_workflow.ga Mon May 06 11:11:11 2013 -0400 @@ -0,0 +1,288 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "Runs SignalP v3.0 and TMHMM v2.0 to look for secreted proteins.", + "format-version": "0.1", + "name": "Find secreted proteins with TMHMM and SignalP", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Input Dataset" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 200, + "top": 200 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"Input Dataset\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": { + "fasta_file": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SignalP 3.0", + "name": "organism" + } + ], + "name": "SignalP 3.0", + "outputs": [ + { + "name": "tabular_file", + "type": "tabular" + } + ], + "position": { + "left": 240, + "top": 341 + }, + "post_job_actions": { + "HideDatasetActiontabular_file": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "tabular_file" + } + }, + "tool_errors": null, + "tool_id": "signalp3", + "tool_state": "{\"organism\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"fasta_file\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"truncate\": \"\\\"60\\\"\", \"__page__\": 0}", + "tool_version": "0.0.8", + "type": "tool", + "user_outputs": [] + }, + "2": { + "annotation": "Select proteins with predicted signal peptide (SignalP NN D-Score or HMM)", + "id": 2, + "input_connections": { + "input": { + "id": 1, + "output_name": "tabular_file" + } + }, + "inputs": [], + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 323, + "top": 528 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + }, + "RenameDatasetActionout_file1": { + "action_arguments": { + "newname": "Filtered SignalP results" + }, + "action_type": "RenameDatasetAction", + "output_name": "out_file1" + } + }, + "tool_errors": null, + "tool_id": "Filter1", + "tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c14=='Y' or c15=='S'\\\"\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input\": \"null\"}", + "tool_version": "1.1.0", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "Select those sequences with signal peptides.", + "id": 3, + "input_connections": { + "input_file": { + "id": 0, + "output_name": "output" + }, + "input_tabular": { + "id": 2, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Filter sequences by ID", + "outputs": [ + { + "name": "output_pos", + "type": "fasta" + }, + { + "name": "output_neg", + "type": "fasta" + } + ], + "position": { + "left": 527, + "top": 200 + }, + "post_job_actions": { + "HideDatasetActionoutput_neg": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_neg" + }, + "HideDatasetActionoutput_pos": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_pos" + } + }, + "tool_errors": null, + "tool_id": "seq_filter_by_id", + "tool_state": "{\"__page__\": 0, \"output_choice_cond\": \"{\\\"output_choice\\\": \\\"pos\\\", \\\"__current_case__\\\": 1}\", \"input_file\": \"null\", \"input_tabular\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\"}", + "tool_version": "0.0.1", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "fasta_file": { + "id": 3, + "output_name": "output_pos" + } + }, + "inputs": [], + "name": "TMHMM 2.0", + "outputs": [ + { + "name": "tabular_file", + "type": "tabular" + } + ], + "position": { + "left": 643, + "top": 443 + }, + "post_job_actions": { + "HideDatasetActiontabular_file": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "tabular_file" + } + }, + "tool_errors": null, + "tool_id": "tmhmm2", + "tool_state": "{\"__page__\": 0, \"fasta_file\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "0.0.7", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "Select proteins with no predicted transmembrane helices.", + "id": 5, + "input_connections": { + "input": { + "id": 4, + "output_name": "tabular_file" + } + }, + "inputs": [], + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 729, + "top": 566 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + }, + "RenameDatasetActionout_file1": { + "action_arguments": { + "newname": "Filtered TMHMM results" + }, + "action_type": "RenameDatasetAction", + "output_name": "out_file1" + } + }, + "tool_errors": null, + "tool_id": "Filter1", + "tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c5== 0\\\"\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input\": \"null\"}", + "tool_version": "1.1.0", + "type": "tool", + "user_outputs": [] + }, + "6": { + "annotation": "Select those sequences with no transmembrane helices (from those with signal peptides).", + "id": 6, + "input_connections": { + "input_file": { + "id": 3, + "output_name": "output_pos" + }, + "input_tabular": { + "id": 5, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Filter sequences by ID", + "outputs": [ + { + "name": "output_pos", + "type": "fasta" + }, + { + "name": "output_neg", + "type": "fasta" + } + ], + "position": { + "left": 893, + "top": 281 + }, + "post_job_actions": { + "HideDatasetActionoutput_neg": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_neg" + }, + "RenameDatasetActionoutput_pos": { + "action_arguments": { + "newname": "Secreted proteins" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_pos" + } + }, + "tool_errors": null, + "tool_id": "seq_filter_by_id", + "tool_state": "{\"__page__\": 0, \"output_choice_cond\": \"{\\\"output_choice\\\": \\\"pos\\\", \\\"__current_case__\\\": 1}\", \"input_file\": \"null\", \"input_tabular\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\"}", + "tool_version": "0.0.1", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file