Mercurial > repos > bgruening > eden_rna_cross_validation_workflow
changeset 0:dd85dc28075f draft
Uploaded
author | bgruening |
---|---|
date | Thu, 15 May 2014 12:17:52 -0400 |
parents | |
children | aa4c5b2ec3fd |
files | EDeN_RNA_Cross_Validation.ga readme.rst repository_dependencies.xml |
diffstat | 3 files changed, 609 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/EDeN_RNA_Cross_Validation.ga Thu May 15 12:17:52 2014 -0400 @@ -0,0 +1,564 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "", + "format-version": "0.1", + "name": "EDeN RNA Cross Validation", + "steps": { + "0": { + "annotation": "Sequence data (for example piRNA)", + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Sequence data (for example piRNA)", + "name": "FASTA file" + } + ], + "name": "Input dataset", + "outputs": [], + "position": { + "left": 192, + "top": 262 + }, + "tool_errors": null, + "tool_id": null, + "tool_state": "{\"name\": \"FASTA file\"}", + "tool_version": null, + "type": "data_input", + "user_outputs": [] + }, + "1": { + "annotation": "", + "id": 1, + "input_connections": { + "input": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "name": "FASTA-to-Tabular", + "outputs": [ + { + "name": "output", + "type": "tabular" + } + ], + "position": { + "left": 399, + "top": 200 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "fasta2tab", + "tool_state": "{\"__page__\": 0, \"keep_first\": \"\\\"0\\\"\", \"descr_columns\": \"\\\"1\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.0", + "type": "tool", + "user_outputs": [] + }, + "2": { + "annotation": "", + "id": 2, + "input_connections": { + "input": { + "id": 1, + "output_name": "output" + } + }, + "inputs": [], + "name": "Unique", + "outputs": [ + { + "name": "outfile", + "type": "input" + } + ], + "position": { + "left": 445, + "top": 324 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/bgruening/unique/bg_uniq/0.3", + "tool_state": "{\"__page__\": 0, \"ignore_case\": \"\\\"False\\\"\", \"adv_opts\": \"{\\\"column_end\\\": {\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}, \\\"column_start\\\": {\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}, \\\"adv_opts_selector\\\": \\\"advanced\\\", \\\"__current_case__\\\": 1}\", \"__rerun_remap_job_id__\": null, \"is_numeric\": \"\\\"False\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "0.3", + "type": "tool", + "user_outputs": [] + }, + "3": { + "annotation": "", + "id": 3, + "input_connections": { + "input": { + "id": 2, + "output_name": "outfile" + } + }, + "inputs": [], + "name": "Tabular-to-FASTA", + "outputs": [ + { + "name": "output", + "type": "fasta" + } + ], + "position": { + "left": 661, + "top": 336 + }, + "post_job_actions": { + "RenameDatasetActionoutput": { + "action_arguments": { + "newname": "Positive Set" + }, + "action_type": "RenameDatasetAction", + "output_name": "output" + } + }, + "tool_errors": null, + "tool_id": "tab2fasta", + "tool_state": "{\"title_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": [\\\"1\\\"]}\", \"__page__\": 0, \"seq_col\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"__rerun_remap_job_id__\": null, \"input\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "1.1.0", + "type": "tool", + "user_outputs": [] + }, + "4": { + "annotation": "", + "id": 4, + "input_connections": { + "token_set_0|repeat_select|infile": { + "id": 2, + "output_name": "outfile" + }, + "token_set_1|repeat_select|infile": { + "id": 2, + "output_name": "outfile" + } + }, + "inputs": [], + "name": "Create text file", + "outputs": [ + { + "name": "outfile", + "type": "txt" + } + ], + "position": { + "left": 584.25, + "top": 728.75 + }, + "post_job_actions": { + "RenameDatasetActionoutfile": { + "action_arguments": { + "newname": "target file" + }, + "action_type": "RenameDatasetAction", + "output_name": "outfile" + } + }, + "tool_errors": null, + "tool_id": "testtoolshed.g2.bx.psu.edu/repos/bgruening/recurring_lines/bg_text_file_with_recurring_lines/0.1", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"token_set\": \"[{\\\"__index__\\\": 0, \\\"line\\\": \\\"1\\\", \\\"repeat_select\\\": {\\\"repeat_select_opts\\\": \\\"file\\\", \\\"infile\\\": null, \\\"__current_case__\\\": 1}}, {\\\"__index__\\\": 1, \\\"line\\\": \\\"-1\\\", \\\"repeat_select\\\": {\\\"repeat_select_opts\\\": \\\"file\\\", \\\"infile\\\": null, \\\"__current_case__\\\": 1}}]\"}", + "tool_version": "0.1", + "type": "tool", + "user_outputs": [] + }, + "5": { + "annotation": "", + "id": 5, + "input_connections": { + "input1": { + "id": 3, + "output_name": "output" + } + }, + "inputs": [], + "name": "shuffleseq", + "outputs": [ + { + "name": "out_file1", + "type": "fasta" + } + ], + "position": { + "left": 675, + "top": 207 + }, + "post_job_actions": { + "RenameDatasetActionout_file1": { + "action_arguments": { + "newname": "Negative Set" + }, + "action_type": "RenameDatasetAction", + "output_name": "out_file1" + } + }, + "tool_errors": null, + "tool_id": "toolshed.g2.bx.psu.edu/repos/devteam/emboss_5/EMBOSS: shuffleseq87/5.0.0", + "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"out_format1\": \"\\\"fasta\\\"\", \"__rerun_remap_job_id__\": null, \"shuffle\": \"\\\"1\\\"\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "5.0.0", + "type": "tool", + "user_outputs": [] + }, + "6": { + "annotation": "", + "id": 6, + "input_connections": { + "input1": { + "id": 3, + "output_name": "output" + }, + "queries_0|input2": { + "id": 5, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Concatenate datasets", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 993, + "top": 246 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "cat1", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input1\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\", \"queries\": \"[{\\\"input2\\\": null, \\\"__index__\\\": 0}]\"}", + "tool_version": "1.0.0", + "type": "tool", + "user_outputs": [] + }, + "7": { + "annotation": "", + "id": 7, + "input_connections": { + "input": { + "id": 6, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "FASTA-to-Tabular", + "outputs": [ + { + "name": "output", + "type": "tabular" + } + ], + "position": { + "left": 259, + "top": 506 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "fasta2tab", + "tool_state": "{\"__page__\": 0, \"keep_first\": \"\\\"0\\\"\", \"descr_columns\": \"\\\"1\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\", \"__rerun_remap_job_id__\": null}", + "tool_version": "1.1.0", + "type": "tool", + "user_outputs": [] + }, + "8": { + "annotation": "", + "id": 8, + "input_connections": { + "input": { + "id": 7, + "output_name": "output" + } + }, + "inputs": [], + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 455, + "top": 524 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "Cut1", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"delimiter\": \"\\\"T\\\"\", \"columnList\": \"\\\"c2\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "1.0.2", + "type": "tool", + "user_outputs": [] + }, + "9": { + "annotation": "", + "id": 9, + "input_connections": { + "input": { + "id": 8, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Add column", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 631, + "top": 463 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "addValue", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"exp\": \"\\\"b\\\"\", \"iterate\": \"\\\"no\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "1.0.0", + "type": "tool", + "user_outputs": [] + }, + "10": { + "annotation": "", + "id": 10, + "input_connections": { + "input": { + "id": 9, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Add column", + "outputs": [ + { + "name": "out_file1", + "type": "input" + } + ], + "position": { + "left": 636, + "top": 567 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "addValue", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"exp\": \"\\\"e\\\"\", \"iterate\": \"\\\"no\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "1.0.0", + "type": "tool", + "user_outputs": [] + }, + "11": { + "annotation": "", + "id": 11, + "input_connections": { + "input1": { + "id": 10, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Merge Columns", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 802, + "top": 478 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "mergeCols1", + "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"__rerun_remap_job_id__\": null, \"col2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"1\\\"}\", \"col1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"2\\\"}\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"[]\"}", + "tool_version": "1.0.1", + "type": "tool", + "user_outputs": [] + }, + "12": { + "annotation": "", + "id": 12, + "input_connections": { + "input1": { + "id": 11, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Merge Columns", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 804, + "top": 585 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "mergeCols1", + "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"__rerun_remap_job_id__\": null, \"col2\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"3\\\"}\", \"col1\": \"{\\\"__class__\\\": \\\"UnvalidatedValue\\\", \\\"value\\\": \\\"4\\\"}\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\", \"columns\": \"[]\"}", + "tool_version": "1.0.1", + "type": "tool", + "user_outputs": [] + }, + "13": { + "annotation": "", + "id": 13, + "input_connections": { + "input": { + "id": 12, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 1017, + "top": 453 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "Cut1", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"delimiter\": \"\\\"T\\\"\", \"columnList\": \"\\\"c5\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\"}", + "tool_version": "1.0.2", + "type": "tool", + "user_outputs": [] + }, + "14": { + "annotation": "", + "id": 14, + "input_connections": { + "infile": { + "id": 13, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "EDeN Converters", + "outputs": [ + { + "name": "outfile", + "type": "sparsevector" + } + ], + "position": { + "left": 1204, + "top": 453 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "bg_eden_feature", + "tool_state": "{\"smooth_opts\": \"{\\\"smooth_opts_selector\\\": \\\"non_smooth\\\", \\\"__current_case__\\\": 0}\", \"distance\": \"\\\"5\\\"\", \"__page__\": 0, \"hash_bit_size\": \"\\\"15\\\"\", \"__rerun_remap_job_id__\": null, \"no_normalization\": \"\\\"False\\\"\", \"radius\": \"\\\"2\\\"\", \"vertex_degree_threshold\": \"\\\"7\\\"\", \"file_type_opts\": \"{\\\"file_type_opts_selector\\\": \\\"STRINGSEQ\\\", \\\"__current_case__\\\": 3}\", \"graph_type\": \"\\\"DIRECTED\\\"\", \"kernel_type_opts\": \"{\\\"kernel_type_opts_selector\\\": \\\"STRING\\\", \\\"__current_case__\\\": 4}\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\", \"infile\": \"null\", \"min_kernel\": \"\\\"False\\\"\"}", + "tool_version": "0.1", + "type": "tool", + "user_outputs": [] + }, + "15": { + "annotation": "", + "id": 15, + "input_connections": { + "sparse_vector_infile": { + "id": 14, + "output_name": "outfile" + }, + "target_infile": { + "id": 4, + "output_name": "outfile" + } + }, + "inputs": [], + "name": "EDeN Crossvalidation", + "outputs": [ + { + "name": "outfile", + "type": "tabular" + } + ], + "position": { + "left": 784, + "top": 734 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "bg_eden_cross_validation", + "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"sparse_vector_infile\": \"null\", \"target_infile\": \"null\", \"chromInfo\": \"\\\"/home/gruening/projects/code/galaxy-central/tool-data/shared/ucsc/chrom/?.len\\\"\", \"num_cross_validation_folds\": \"\\\"10\\\"\"}", + "tool_version": "0.1", + "type": "tool", + "user_outputs": [] + }, + "16": { + "annotation": "", + "id": 16, + "input_connections": { + "input": { + "id": 15, + "output_name": "outfile" + } + }, + "inputs": [], + "name": "Cut", + "outputs": [ + { + "name": "out_file1", + "type": "tabular" + } + ], + "position": { + "left": 986.25, + "top": 723.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "Cut1", + "tool_state": "{\"columnList\": \"\\\"c2,c4\\\"\", \"input\": \"null\", \"delimiter\": \"\\\"T\\\"\", \"__rerun_remap_job_id__\": null, \"__page__\": 0}", + "tool_version": "1.0.2", + "type": "tool", + "user_outputs": [] + }, + "17": { + "annotation": "", + "id": 17, + "input_connections": { + "infile": { + "id": 16, + "output_name": "out_file1" + } + }, + "inputs": [], + "name": "Performance metrics", + "outputs": [ + { + "name": "outfile", + "type": "tabular" + }, + { + "name": "outfile_plotting", + "type": "tabular" + } + ], + "position": { + "left": 1173.75, + "top": 702.75 + }, + "post_job_actions": {}, + "tool_errors": null, + "tool_id": "testtoolshed.g2.bx.psu.edu/repos/bgruening/perf/stats_perf_tool/5.11.0", + "tool_state": "{\"plot\": \"\\\"-plot roc\\\"\", \"performance_measures\": \"[\\\"-ACC\\\", \\\"-ROC\\\", \\\"-APR\\\"]\", \"__page__\": 0, \"__rerun_remap_job_id__\": null, \"threshold\": \"\\\"0.0\\\"\", \"infile\": \"null\"}", + "tool_version": "0.1.0", + "type": "tool", + "user_outputs": [] + } + } +} \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.rst Thu May 15 12:17:52 2014 -0400 @@ -0,0 +1,38 @@ +This package is a Galaxy workflow for RNA sequence cross validation. + +It uses the EDeN suite to train and test on a known set of +RNA sequences. The negative set of RNA sequences is shuffled +on the input dataset maintaining composition with shuffleseq (EMBOSS). + +See http://www.galaxyproject.org for information about the Galaxy Project. + + +Sample Data +=========== + + + +Citation +======== + + + +Availability +============ + +This workflow is available on the main Galaxy Tool Shed: + +http://toolshed.g2.bx.psu.edu/view/bgruening/eden_rna_cross_validation_workflow + +Development is being done on github: + +https://github.com/bgruening/galaxytools/workflows/EDeN/ + + +Dependencies +============ + +These dependencies should be resolved automatically via the Galaxy Tool Shed: + +* http://toolshed.g2.bx.psu.edu/view/bgruening/glimmer3 +* http://toolshed.g2.bx.psu.edu/view/devteam/emboss_5
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Thu May 15 12:17:52 2014 -0400 @@ -0,0 +1,7 @@ +<?xml version="1.0"?> +<repositories description="This workflow requires the EDeN suite, EMBOSS and a few small text manipulation tools."> + <repository changeset_revision="f034a9d33aca" name="recurring_lines" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="e390b5b6b89c" name="perf" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="5be8af51780d" name="eden_toolbox" owner="bgruening" toolshed="http://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="1b9abd7d2445" name="emboss_5" owner="devteam" toolshed="http://testtoolshed.g2.bx.psu.edu" /> +</repositories>