changeset 0:3a6de5cb858d draft

Uploaded first attempt at a workflow with dependencies on the Tool Shed
author peterjc
date Mon, 06 May 2013 11:11:11 -0400
parents
children 9f2fea8a5d32
files README.md repository_dependencies.xml secreted_protein_workflow.ga
diffstat 3 files changed, 320 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Mon May 06 11:11:11 2013 -0400
@@ -0,0 +1,25 @@
+This Tool Shed Repository contains a workflow for the identification of candidate secreted proteins from a given protein FASATA file.
+
+It runs SignalP v3.0 and selects only proteins with a strong predicted signal peptide, and then runs TMHMM v2.0 on those, and selects only proteins without a predicted trans-membrane helix. This workflow was used in Kikuchi et al (2001), and is a simplification of the candidate effector protocol described in Jones et al (2009).
+
+Kikuchi T, Cotton JA, Dalzell JJ, Hasegawa K, Kanzaki N, et al. (2011) Genomic insights into the origin of parasitism in the emerging plant pathogen Bursaphelenchus xylophilus. PLoS Pathog 7: e1002219.
+http://dx.doi.org/10.1371/journal.ppat.1002219
+
+Jones JT, Kumar A, Pylypenko LA, Thirugnanasambandam A, Castelli L, et al. (2009) Identification and functional characterization of effectors in expressed sequence tags from various life cycle stages of the potato cyst nematode Globodera pallida. Mol Plant Pathol 10: 815–28.
+http://dx.doi.org/10.1111/j.1364-3703.2009.00585.x
+
+Bendtsen JD, Nielsen H, von Heijne G, Brunak S (2004) Improved prediction of signal peptides: SignalP 3.0. J Mol Biol 340: 783–95.
+http://dx.doi.org/10.1016/j.jmb.2004.05.028
+
+Krogh A, Larsson B, von Heijne G, Sonnhammer E (2001) Predicting transmembrane protein topology with a hidden Markov model: application to complete genomes. J Mol Biol 305: 567- 580.
+http://dx.doi.org/10.1006/jmbi.2000.4315
+
+
+Availability
+============
+
+This workflow is available on the main Galaxy Tool Shed:
+http://toolshed.g2.bx.psu.edu/view/peterjc/secreted_protein_workflow
+
+Development is being done on github here:
+https://github.com/peterjc/picobio/tree/master/galaxy_workflows/secreted_protein_workflow
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Mon May 06 11:11:11 2013 -0400
@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<repositories description="This requires my SignalP and TMHMM wrapers, and my FASTA filtering tool.">
+    <!-- Revision 15:6abd809cefdd on the main tool shed is v0.2.4, the current latest - but older should be OK -->
+    <repository toolshed="http://toolshed.g2.bx.psu.edu" name="tmhmm_and_signalp" owner="peterjc" changeset_revision="6abd809cefdd" />
+    <!-- Revision 2:abdd608c869b on the main tool shed is v0.0.5, the current latest - but older should be OK -->
+    <repository toolshed="http://toolshed.g2.bx.psu.edu" name="seq_filter_by_id" owner="peterjc" changeset_revision="abdd608c869b" />
+</repositories>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/secreted_protein_workflow.ga	Mon May 06 11:11:11 2013 -0400
@@ -0,0 +1,288 @@
+{
+    "a_galaxy_workflow": "true", 
+    "annotation": "Runs SignalP v3.0 and TMHMM v2.0 to look for secreted proteins.", 
+    "format-version": "0.1", 
+    "name": "Find secreted proteins with TMHMM and SignalP", 
+    "steps": {
+        "0": {
+            "annotation": "", 
+            "id": 0, 
+            "input_connections": {}, 
+            "inputs": [
+                {
+                    "description": "", 
+                    "name": "Input Dataset"
+                }
+            ], 
+            "name": "Input dataset", 
+            "outputs": [], 
+            "position": {
+                "left": 200, 
+                "top": 200
+            }, 
+            "tool_errors": null, 
+            "tool_id": null, 
+            "tool_state": "{\"name\": \"Input Dataset\"}", 
+            "tool_version": null, 
+            "type": "data_input", 
+            "user_outputs": []
+        }, 
+        "1": {
+            "annotation": "", 
+            "id": 1, 
+            "input_connections": {
+                "fasta_file": {
+                    "id": 0, 
+                    "output_name": "output"
+                }
+            }, 
+            "inputs": [
+                {
+                    "description": "runtime parameter for tool SignalP 3.0", 
+                    "name": "organism"
+                }
+            ], 
+            "name": "SignalP 3.0", 
+            "outputs": [
+                {
+                    "name": "tabular_file", 
+                    "type": "tabular"
+                }
+            ], 
+            "position": {
+                "left": 240, 
+                "top": 341
+            }, 
+            "post_job_actions": {
+                "HideDatasetActiontabular_file": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "tabular_file"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "signalp3", 
+            "tool_state": "{\"organism\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"fasta_file\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"truncate\": \"\\\"60\\\"\", \"__page__\": 0}", 
+            "tool_version": "0.0.8", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "2": {
+            "annotation": "Select proteins with predicted signal peptide (SignalP NN D-Score or HMM)", 
+            "id": 2, 
+            "input_connections": {
+                "input": {
+                    "id": 1, 
+                    "output_name": "tabular_file"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Filter", 
+            "outputs": [
+                {
+                    "name": "out_file1", 
+                    "type": "input"
+                }
+            ], 
+            "position": {
+                "left": 323, 
+                "top": 528
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionout_file1": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "out_file1"
+                }, 
+                "RenameDatasetActionout_file1": {
+                    "action_arguments": {
+                        "newname": "Filtered SignalP results"
+                    }, 
+                    "action_type": "RenameDatasetAction", 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "Filter1", 
+            "tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c14=='Y' or c15=='S'\\\"\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input\": \"null\"}", 
+            "tool_version": "1.1.0", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "3": {
+            "annotation": "Select those sequences with signal peptides.", 
+            "id": 3, 
+            "input_connections": {
+                "input_file": {
+                    "id": 0, 
+                    "output_name": "output"
+                }, 
+                "input_tabular": {
+                    "id": 2, 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Filter sequences by ID", 
+            "outputs": [
+                {
+                    "name": "output_pos", 
+                    "type": "fasta"
+                }, 
+                {
+                    "name": "output_neg", 
+                    "type": "fasta"
+                }
+            ], 
+            "position": {
+                "left": 527, 
+                "top": 200
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionoutput_neg": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "output_neg"
+                }, 
+                "HideDatasetActionoutput_pos": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "output_pos"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "seq_filter_by_id", 
+            "tool_state": "{\"__page__\": 0, \"output_choice_cond\": \"{\\\"output_choice\\\": \\\"pos\\\", \\\"__current_case__\\\": 1}\", \"input_file\": \"null\", \"input_tabular\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\"}", 
+            "tool_version": "0.0.1", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "4": {
+            "annotation": "", 
+            "id": 4, 
+            "input_connections": {
+                "fasta_file": {
+                    "id": 3, 
+                    "output_name": "output_pos"
+                }
+            }, 
+            "inputs": [], 
+            "name": "TMHMM 2.0", 
+            "outputs": [
+                {
+                    "name": "tabular_file", 
+                    "type": "tabular"
+                }
+            ], 
+            "position": {
+                "left": 643, 
+                "top": 443
+            }, 
+            "post_job_actions": {
+                "HideDatasetActiontabular_file": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "tabular_file"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "tmhmm2", 
+            "tool_state": "{\"__page__\": 0, \"fasta_file\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", 
+            "tool_version": "0.0.7", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "5": {
+            "annotation": "Select proteins with no predicted transmembrane helices.", 
+            "id": 5, 
+            "input_connections": {
+                "input": {
+                    "id": 4, 
+                    "output_name": "tabular_file"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Filter", 
+            "outputs": [
+                {
+                    "name": "out_file1", 
+                    "type": "input"
+                }
+            ], 
+            "position": {
+                "left": 729, 
+                "top": 566
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionout_file1": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "out_file1"
+                }, 
+                "RenameDatasetActionout_file1": {
+                    "action_arguments": {
+                        "newname": "Filtered TMHMM results"
+                    }, 
+                    "action_type": "RenameDatasetAction", 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "Filter1", 
+            "tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c5== 0\\\"\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input\": \"null\"}", 
+            "tool_version": "1.1.0", 
+            "type": "tool", 
+            "user_outputs": []
+        }, 
+        "6": {
+            "annotation": "Select those sequences with no transmembrane helices (from those with signal peptides).", 
+            "id": 6, 
+            "input_connections": {
+                "input_file": {
+                    "id": 3, 
+                    "output_name": "output_pos"
+                }, 
+                "input_tabular": {
+                    "id": 5, 
+                    "output_name": "out_file1"
+                }
+            }, 
+            "inputs": [], 
+            "name": "Filter sequences by ID", 
+            "outputs": [
+                {
+                    "name": "output_pos", 
+                    "type": "fasta"
+                }, 
+                {
+                    "name": "output_neg", 
+                    "type": "fasta"
+                }
+            ], 
+            "position": {
+                "left": 893, 
+                "top": 281
+            }, 
+            "post_job_actions": {
+                "HideDatasetActionoutput_neg": {
+                    "action_arguments": {}, 
+                    "action_type": "HideDatasetAction", 
+                    "output_name": "output_neg"
+                }, 
+                "RenameDatasetActionoutput_pos": {
+                    "action_arguments": {
+                        "newname": "Secreted proteins"
+                    }, 
+                    "action_type": "RenameDatasetAction", 
+                    "output_name": "output_pos"
+                }
+            }, 
+            "tool_errors": null, 
+            "tool_id": "seq_filter_by_id", 
+            "tool_state": "{\"__page__\": 0, \"output_choice_cond\": \"{\\\"output_choice\\\": \\\"pos\\\", \\\"__current_case__\\\": 1}\", \"input_file\": \"null\", \"input_tabular\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\"}", 
+            "tool_version": "0.0.1", 
+            "type": "tool", 
+            "user_outputs": []
+        }
+    }
+}
\ No newline at end of file