# HG changeset patch # User peterjc # Date 1367853071 14400 # Node ID 3a6de5cb858dc009d5244fa4ba8c10bb221b78a1 Uploaded first attempt at a workflow with dependencies on the Tool Shed diff -r 000000000000 -r 3a6de5cb858d README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon May 06 11:11:11 2013 -0400 @@ -0,0 +1,25 @@ +This Tool Shed Repository contains a workflow for the identification of candidate secreted proteins from a given protein FASATA file. + +It runs SignalP v3.0 and selects only proteins with a strong predicted signal peptide, and then runs TMHMM v2.0 on those, and selects only proteins without a predicted trans-membrane helix. This workflow was used in Kikuchi et al (2001), and is a simplification of the candidate effector protocol described in Jones et al (2009). + +Kikuchi T, Cotton JA, Dalzell JJ, Hasegawa K, Kanzaki N, et al. (2011) Genomic insights into the origin of parasitism in the emerging plant pathogen Bursaphelenchus xylophilus. PLoS Pathog 7: e1002219. +http://dx.doi.org/10.1371/journal.ppat.1002219 + +Jones JT, Kumar A, Pylypenko LA, Thirugnanasambandam A, Castelli L, et al. (2009) Identification and functional characterization of effectors in expressed sequence tags from various life cycle stages of the potato cyst nematode Globodera pallida. Mol Plant Pathol 10: 815–28. +http://dx.doi.org/10.1111/j.1364-3703.2009.00585.x + +Bendtsen JD, Nielsen H, von Heijne G, Brunak S (2004) Improved prediction of signal peptides: SignalP 3.0. J Mol Biol 340: 783–95. +http://dx.doi.org/10.1016/j.jmb.2004.05.028 + +Krogh A, Larsson B, von Heijne G, Sonnhammer E (2001) Predicting transmembrane protein topology with a hidden Markov model: application to complete genomes. J Mol Biol 305: 567- 580. +http://dx.doi.org/10.1006/jmbi.2000.4315 + + +Availability +============ + +This workflow is available on the main Galaxy Tool Shed: +http://toolshed.g2.bx.psu.edu/view/peterjc/secreted_protein_workflow + +Development is being done on github here: +https://github.com/peterjc/picobio/tree/master/galaxy_workflows/secreted_protein_workflow diff -r 000000000000 -r 3a6de5cb858d repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Mon May 06 11:11:11 2013 -0400 @@ -0,0 +1,7 @@ + + + + + + + diff -r 000000000000 -r 3a6de5cb858d secreted_protein_workflow.ga --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/secreted_protein_workflow.ga Mon May 06 11:11:11 2013 -0400 @@ -0,0 +1,288 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "Runs SignalP v3.0 and TMHMM v2.0 to look for secreted proteins.", + "format-version": "0.1", + "name": "Find secreted proteins with TMHMM and SignalP", + "steps": { + "0": { + "annotation": "", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "", + "name": "Input Dataset" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 200, + "top": 200 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"Input Dataset\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": { + "fasta_file": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool SignalP 3.0", + "name": "organism" + } + ], + "name": "SignalP 3.0", + "outputs": [ + { + "name": "tabular_file", + "type": "tabular" + } + ], + "position": { + "left": 240, + "top": 341 + }, + "post_job_actions": { + "HideDatasetActiontabular_file": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "tabular_file" + } + }, + "tool_errors": null, + "tool_id": "signalp3", + "tool_state": "{\"organism\": \"{\\\"__class__\\\": \\\"RuntimeValue\\\"}\", \"fasta_file\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"truncate\": \"\\\"60\\\"\", \"__page__\": 0}", + "tool_version": "0.0.8", + "type": "tool", + "user_outputs": [] + }, + "2": { + "annotation": "Select proteins with predicted signal peptide (SignalP NN D-Score or HMM)", + "id": 2, + "input_connections": { + "input": { + "id": 1, + "output_name": "tabular_file" + } + }, + "inputs": [], + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 323, + "top": 528 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + }, + "RenameDatasetActionout_file1": { + "action_arguments": { + "newname": "Filtered SignalP results" + }, + "action_type": "RenameDatasetAction", + "output_name": "out_file1" + } + }, + "tool_errors": null, + "tool_id": "Filter1", + "tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c14=='Y' or c15=='S'\\\"\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input\": \"null\"}", + "tool_version": "1.1.0", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "Select those sequences with signal peptides.", + "id": 3, + "input_connections": { + "input_file": { + "id": 0, + "output_name": "output" + }, + "input_tabular": { + "id": 2, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Filter sequences by ID", + "outputs": [ + { + "name": "output_pos", + "type": "fasta" + }, + { + "name": "output_neg", + "type": "fasta" + } + ], + "position": { + "left": 527, + "top": 200 + }, + "post_job_actions": { + "HideDatasetActionoutput_neg": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_neg" + }, + "HideDatasetActionoutput_pos": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_pos" + } + }, + "tool_errors": null, + "tool_id": "seq_filter_by_id", + "tool_state": "{\"__page__\": 0, \"output_choice_cond\": \"{\\\"output_choice\\\": \\\"pos\\\", \\\"__current_case__\\\": 1}\", \"input_file\": \"null\", \"input_tabular\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\"}", + "tool_version": "0.0.1", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "fasta_file": { + "id": 3, + "output_name": "output_pos" + } + }, + "inputs": [], + "name": "TMHMM 2.0", + "outputs": [ + { + "name": "tabular_file", + "type": "tabular" + } + ], + "position": { + "left": 643, + "top": 443 + }, + "post_job_actions": { + "HideDatasetActiontabular_file": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "tabular_file" + } + }, + "tool_errors": null, + "tool_id": "tmhmm2", + "tool_state": "{\"__page__\": 0, \"fasta_file\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "0.0.7", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "Select proteins with no predicted transmembrane helices.", + "id": 5, + "input_connections": { + "input": { + "id": 4, + "output_name": "tabular_file" + } + }, + "inputs": [], + "name": "Filter", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 729, + "top": 566 + }, + "post_job_actions": { + "HideDatasetActionout_file1": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "out_file1" + }, + "RenameDatasetActionout_file1": { + "action_arguments": { + "newname": "Filtered TMHMM results" + }, + "action_type": "RenameDatasetAction", + "output_name": "out_file1" + } + }, + "tool_errors": null, + "tool_id": "Filter1", + "tool_state": "{\"__page__\": 0, \"cond\": \"\\\"c5== 0\\\"\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"input\": \"null\"}", + "tool_version": "1.1.0", + "type": "tool", + "user_outputs": [] + }, + "6": { + "annotation": "Select those sequences with no transmembrane helices (from those with signal peptides).", + "id": 6, + "input_connections": { + "input_file": { + "id": 3, + "output_name": "output_pos" + }, + "input_tabular": { + "id": 5, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Filter sequences by ID", + "outputs": [ + { + "name": "output_pos", + "type": "fasta" + }, + { + "name": "output_neg", + "type": "fasta" + } + ], + "position": { + "left": 893, + "top": 281 + }, + "post_job_actions": { + "HideDatasetActionoutput_neg": { + "action_arguments": {}, + "action_type": "HideDatasetAction", + "output_name": "output_neg" + }, + "RenameDatasetActionoutput_pos": { + "action_arguments": { + "newname": "Secreted proteins" + }, + "action_type": "RenameDatasetAction", + "output_name": "output_pos" + } + }, + "tool_errors": null, + "tool_id": "seq_filter_by_id", + "tool_state": "{\"__page__\": 0, \"output_choice_cond\": \"{\\\"output_choice\\\": \\\"pos\\\", \\\"__current_case__\\\": 1}\", \"input_file\": \"null\", \"input_tabular\": \"null\", \"chromInfo\": \"\\\"/opt/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\"}", + "tool_version": "0.0.1", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file