changeset 0:4f7004475124 draft default tip

planemo upload for repository https://github.com/pvanheus/data_manager_shapeit_reference commit aa91508cbd163b7b005d87824ffa997e62d5add6
author sanbi-uwc
date Tue, 18 Sep 2018 00:54:49 -0400
parents
children
files LICENSE bioblend/.ipynb_checkpoints/Untitled-checkpoint.ipynb bioblend/.nbgrader.log bioblend/Untitled.ipynb data_manager/shapeit_ref.py data_manager/shapeit_ref.xml data_manager_conf.xml test-data/sample.out tool-data/shapeit2_ref.loc.sample tool_data_table_conf.xml.sample
diffstat 10 files changed, 374 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 pvanheus
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bioblend/.ipynb_checkpoints/Untitled-checkpoint.ipynb	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bioblend/.nbgrader.log	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,26 @@
+[NotebookApp | WARNING] No nbgrader_config.py file found (rerun with --debug to see where nbgrader is looking)
+[NotebookApp | INFO] Loading the validate_assignment nbgrader serverextension
+[NotebookApp | INFO] ✓ nbpresent HTML export ENABLED
+[NotebookApp | WARNING] ✗ nbpresent PDF export DISABLED: No module named 'nbbrowserpdf'
+[NotebookApp | INFO] Serving notebooks from local directory: /home/pvh/Documents/code/SANBI/CPGR/data_managers/data_manager_shapeit_ref/bioblend
+[NotebookApp | INFO] 0 active kernels
+[NotebookApp | INFO] The Jupyter Notebook is running at:
+[NotebookApp | INFO] http://localhost:8888/?token=61e63c3509f8ac0317fda9361f366d62624139ff97fe34b1
+[NotebookApp | INFO] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
+[NotebookApp | CRITICAL] 
+    
+    Copy/paste this URL into your browser when you connect for the first time,
+    to login with a token:
+        http://localhost:8888/?token=61e63c3509f8ac0317fda9361f366d62624139ff97fe34b1
+[NotebookApp | INFO] Accepting one-time-token-authenticated connection from 127.0.0.1
+[NotebookApp | INFO] Creating new notebook in 
+[NotebookApp | INFO] Kernel started: 80389481-149c-4b52-9ae8-fa1b81e1ca1b
+[NotebookApp | INFO] Adapting to protocol v5.1 for kernel 80389481-149c-4b52-9ae8-fa1b81e1ca1b
+[NotebookApp | INFO] Saving file at /Untitled.ipynb
+[NotebookApp | INFO] Saving file at /Untitled.ipynb
+[NotebookApp | INFO] Saving file at /Untitled.ipynb
+[NotebookApp | INFO] Saving file at /Untitled.ipynb
+[NotebookApp | INFO] Saving file at /Untitled.ipynb
+[NotebookApp | INFO] Saving file at /Untitled.ipynb
+[NotebookApp | INFO] Saving file at /Untitled.ipynb
+[NotebookApp | INFO] Starting buffering for 80389481-149c-4b52-9ae8-fa1b81e1ca1b:8df5e156c9b94abc8d2818315546f1cf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bioblend/Untitled.ipynb	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,196 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from bioblend import toolshed"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ts = toolshed.ToolShedInstance(url='https://testtoolshed.g2.bx.psu.edu', key='49ad6deed569af1c011647e3911b9ea7')\n",
+    "all_repos = ts.repositories.get_repositories()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'category_ids': ['83f763caf629aa54'],\n",
+       "  'deleted': False,\n",
+       "  'deprecated': False,\n",
+       "  'description': 'Add path for SHAPEIT reference data to shapeit_ref table',\n",
+       "  'homepage_url': '',\n",
+       "  'id': '59c15cd8db07b838',\n",
+       "  'model_class': 'Repository',\n",
+       "  'name': 'data_manager_shapeit_reference',\n",
+       "  'owner': 'sanbi-uwc',\n",
+       "  'private': False,\n",
+       "  'remote_repository_url': 'https://github.com/pvanheus/data_manager_shapeit_reference',\n",
+       "  'times_downloaded': 22,\n",
+       "  'type': 'unrestricted',\n",
+       "  'user_id': '25a703560f0040ae'}]"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[ repo for repo in all_repos if repo['owner'] == 'sanbi-uwc' and repo['name'] == 'data_manager_shapeit_reference' ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_revisions = ts.repositories.repository_revisions() "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[{'changeset_revision': '6b54dcb42cd5',\n",
+       "  'downloadable': False,\n",
+       "  'has_repository_dependencies': False,\n",
+       "  'id': 'c85f89a2a97b5743',\n",
+       "  'includes_datatypes': False,\n",
+       "  'includes_tool_dependencies': False,\n",
+       "  'includes_tools': False,\n",
+       "  'includes_tools_for_display_in_tool_panel': False,\n",
+       "  'includes_workflows': False,\n",
+       "  'malicious': False,\n",
+       "  'missing_test_components': False,\n",
+       "  'model_class': 'RepositoryMetadata',\n",
+       "  'repository_id': '59c15cd8db07b838',\n",
+       "  'url': '/api/repository_revisions/c85f89a2a97b5743'},\n",
+       " {'changeset_revision': 'f0c7cab2f547',\n",
+       "  'downloadable': False,\n",
+       "  'has_repository_dependencies': False,\n",
+       "  'id': '35de89a3905a4173',\n",
+       "  'includes_datatypes': False,\n",
+       "  'includes_tool_dependencies': False,\n",
+       "  'includes_tools': False,\n",
+       "  'includes_tools_for_display_in_tool_panel': False,\n",
+       "  'includes_workflows': False,\n",
+       "  'malicious': False,\n",
+       "  'missing_test_components': False,\n",
+       "  'model_class': 'RepositoryMetadata',\n",
+       "  'repository_id': '59c15cd8db07b838',\n",
+       "  'url': '/api/repository_revisions/35de89a3905a4173'},\n",
+       " {'changeset_revision': '52f0767fe68d',\n",
+       "  'downloadable': True,\n",
+       "  'has_repository_dependencies': False,\n",
+       "  'id': '778b955f13aeb7f7',\n",
+       "  'includes_datatypes': False,\n",
+       "  'includes_tool_dependencies': False,\n",
+       "  'includes_tools': True,\n",
+       "  'includes_tools_for_display_in_tool_panel': False,\n",
+       "  'includes_workflows': False,\n",
+       "  'malicious': False,\n",
+       "  'missing_test_components': False,\n",
+       "  'model_class': 'RepositoryMetadata',\n",
+       "  'repository_id': '59c15cd8db07b838',\n",
+       "  'url': '/api/repository_revisions/778b955f13aeb7f7'},\n",
+       " {'changeset_revision': 'f86c65c60056',\n",
+       "  'downloadable': True,\n",
+       "  'has_repository_dependencies': False,\n",
+       "  'id': '32de50d3c34e66d3',\n",
+       "  'includes_datatypes': False,\n",
+       "  'includes_tool_dependencies': False,\n",
+       "  'includes_tools': True,\n",
+       "  'includes_tools_for_display_in_tool_panel': False,\n",
+       "  'includes_workflows': False,\n",
+       "  'malicious': False,\n",
+       "  'missing_test_components': False,\n",
+       "  'model_class': 'RepositoryMetadata',\n",
+       "  'repository_id': '59c15cd8db07b838',\n",
+       "  'url': '/api/repository_revisions/32de50d3c34e66d3'},\n",
+       " {'changeset_revision': '495439e68fca',\n",
+       "  'downloadable': True,\n",
+       "  'has_repository_dependencies': False,\n",
+       "  'id': '6ac625afb8ce0ebd',\n",
+       "  'includes_datatypes': False,\n",
+       "  'includes_tool_dependencies': False,\n",
+       "  'includes_tools': True,\n",
+       "  'includes_tools_for_display_in_tool_panel': False,\n",
+       "  'includes_workflows': False,\n",
+       "  'malicious': False,\n",
+       "  'missing_test_components': False,\n",
+       "  'model_class': 'RepositoryMetadata',\n",
+       "  'repository_id': '59c15cd8db07b838',\n",
+       "  'url': '/api/repository_revisions/6ac625afb8ce0ebd'},\n",
+       " {'changeset_revision': '203133a92000',\n",
+       "  'downloadable': True,\n",
+       "  'has_repository_dependencies': False,\n",
+       "  'id': 'eed087cca5ea50c4',\n",
+       "  'includes_datatypes': False,\n",
+       "  'includes_tool_dependencies': False,\n",
+       "  'includes_tools': True,\n",
+       "  'includes_tools_for_display_in_tool_panel': False,\n",
+       "  'includes_workflows': False,\n",
+       "  'malicious': False,\n",
+       "  'missing_test_components': False,\n",
+       "  'model_class': 'RepositoryMetadata',\n",
+       "  'repository_id': '59c15cd8db07b838',\n",
+       "  'url': '/api/repository_revisions/eed087cca5ea50c4'}]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "[ revision for revision in all_revisions if revision['repository_id'] == '59c15cd8db07b838' ]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [Root]",
+   "language": "python",
+   "name": "Python [Root]"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/shapeit_ref.py	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,62 @@
+#!/usr/bin/env python3
+
+from __future__ import division, print_function
+import argparse
+import json
+import os
+import os.path
+
+
+def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
+    data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+    data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
+    data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
+    return data_manager_dict
+
+
+_add_data_table_entry.__annotations__ = {'data_manager': dict, 'data_table_name': str, 'data_table_entry': dict, 'return': dict}
+
+
+def assert_prefix_exists(prefix, path, prefix_type):
+    prefix_exists = False
+    for filename in os.listdir(path):
+        if filename.startswith(prefix):
+            prefix_exists = True
+            break
+    else:
+        if not prefix_exists:
+            exit("Unable to find a file with {} prefix {} in {}".format(prefix_type, prefix, path))
+
+
+assert_prefix_exists.__annotations__ = {'prefix': str, 'path': str, 'prefix_type': str, 'return': None}
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Generate a data manager output for SHAPEIT reference data')
+    parser.add_argument('key', help='Short key to identify this reference set (no spaces)')
+    parser.add_argument('name', help='Description of reference set')
+    parser.add_argument('path', help='Filesystem path to directory containing this reference set')
+    parser.add_argument('reference_prefix', help='Filename prefix for the reference (.hap / .legend / .sample) files')
+    parser.add_argument('map_prefix', help='Filename prefix for map files in this reference set')
+    parser.add_argument('sample_prefix', help='Filename prefix for sample file in this reference set')
+    parser.add_argument('output_file', type=argparse.FileType('w'), help='JSON file used to write data manager values to')
+    args = parser.parse_args()
+
+    if not os.path.exists(args.path):
+        exit("Unable to find specified path {}".format(args.path))
+
+    assert_prefix_exists(args.reference_prefix, args.path, 'reference')
+    assert_prefix_exists(args.map_prefix, args.path, 'map')
+    assert_prefix_exists(args.sample_prefix, args.path, 'sample')
+
+    for column in ('key', 'name', 'path', 'reference_prefix', 'map_prefix', 'sample_prefix'):
+        value = getattr(args, column)
+        if '\t' in value:
+            exit("TAB character found in {} argument".format(column))
+
+    data_manager_dict = {}
+    data_table_entry = dict(value=args.key, name=args.name, path=args.path,
+                            reference_prefix=args.reference_prefix, map_prefix=args.map_prefix,
+                            sample_prefix=args.sample_prefix)
+    _add_data_table_entry(data_manager_dict, 'shapeit2_ref', data_table_entry)
+
+    args.output_file.write(json.dumps(data_manager_dict, sort_keys=True) + '\n')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/shapeit_ref.xml	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,34 @@
+<tool id="shapeit_reference_manager" name="SHAPEIT reference data path manager" tool_type="manage_data" version="0.0.8">
+    <command detect_errors="exit_code"><![CDATA[
+    python $__tool_directory__/shapeit_ref.py
+        '${value}'
+        '${name}'
+        '${path}'
+        '${reference_prefix}'
+        '${map_prefix}'
+        '${sample_prefix}'
+        '${output_file}'
+    ]]></command>>
+    <inputs>
+        <param name="value" type="text" label="Short key to identify this reference set (no spaces)" />
+        <param name="name" type="text" label="Description of the reference data" />
+        <param name="path" type="text" label="Filesystem path to directory containing this reference set" />
+        <param name="reference_prefix" type="text" label="Filename prefix for the reference (.hap / .legend / .sample) files" />
+        <param name="map_prefix" type="text" label="Filename prefix for map files in this reference set" />
+        <param name="sample_prefix" type="text" label="Filename prefix for sample file for this reference set">
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json" />
+    </outputs>
+    <help><![CDATA[
+    This data manager updates the *shapeit_ref* table with information about locally-maintained reference data
+    used by the SHAPEIT_ haplotype estimation (i.e. phasing) package. An example for the 1000 Genomes
+    dataset is available here_.
+
+    .. _SHAPEIT: http://mathgen.stats.ox.ac.uk/genetics_software/shapeit/shapeit.html
+    .. _here: https://mathgen.stats.ox.ac.uk/impute/1000GP_Phase3.html
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/nmeth.1785</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,15 @@
+<?xml version="1.0"?>
+<data_managers>    
+    <data_manager tool_file="data_manager/shapeit_ref.xml" id="data_manager_shapeit2_reference">
+        <data_table name="shapeit2_ref">
+            <output>
+                <column name="value" />
+                <column name="name" />
+                <column name="path" />
+                <column name="reference_prefix" />
+                <column name="map_prefix" />
+                <column name="sample_prefix" />
+            </output>
+        </data_table>
+    </data_manager>    
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample.out	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,1 @@
+{"data_tables": {"shapeit2_ref": [{"name": "1000 Genomes Project (phase 3) haplotypes (http://mathgen.stats.ox.ac.uk/impute/1000GPX20Phase3.html)", "value": "1000GP_Phase3", "map_prefix": "genetic_map_chr", "path": "/tools/databases/1000G/1000GP_Phase3", "reference_prefix": "1000GP_Phase3_chr", "sample_prefix": "1000GP_Phase3"}]}}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/shapeit2_ref.loc.sample	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,7 @@
+#This file lists the locations of SHAPEIT reference data connections
+#
+#<value>	<name>	<path>	<reference_file_prefix>	<map_file_prefix>	<sample_file_prefix>
+#
+#For example
+#
+#1000G_Phase3	1000 Genomes project haplotypes	/path/to/shapeit_ref/1000G	1000GP_Phase3_chr	genetic_map_chr	1000GP_Phase3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Tue Sep 18 00:54:49 2018 -0400
@@ -0,0 +1,6 @@
+<tables>
+    <table name="shapeit2_ref" comment_char="#">
+        <columns>value, name, path, reference_prefix, map_prefix, sample_prefix</columns>
+        <file path="tool-data/shapeit2_ref.loc" />
+    </table>
+</tables>