changeset 0:f86c65c60056 draft

planemo upload for repository https://github.com/pvanheus/data_manager_shapeit_reference commit c9acc04bcb85873e6eb78b4163d1a7daa2679f22
author sanbi-uwc
date Tue, 11 Sep 2018 10:39:38 -0400
parents
children 52f0767fe68d
files LICENSE data_manager/shapeit_ref.py data_manager/shapeit_ref.xml data_manager_conf.xml tool-data/shapeit_ref.loc.sample tool_data_tables_conf.xml.sample
diffstat 6 files changed, 125 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Tue Sep 11 10:39:38 2018 -0400
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 pvanheus
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/shapeit_ref.py	Tue Sep 11 10:39:38 2018 -0400
@@ -0,0 +1,47 @@
+#!/usr/bin/env python3
+
+from __future__ import division, print_function
+import argparse
+import json
+import os
+import os.path
+
+
+def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry):
+    data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+    data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, [])
+    data_manager_dict['data_tables'][data_table_name].append(data_table_entry)
+    return data_manager_dict
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Generate a data manager output for SHAPEIT reference data')
+    parser.add_argument('key', help='Short key to identify this reference set (no spaces)')
+    parser.add_argument('description', help='Description of reference set')
+    parser.add_argument('path', help='Filesystem path to directory containing this reference set')
+    parser.add_argument('prefix', help='Filename prefix for files in this reference set')
+    parser.add_argument('output_file', type=argparse.FileType('w'), help='JSON file used to write data manager values to')
+    args = parser.parse_args()
+
+    if not os.path.exists(args.path):
+        exit("Unable to find specified path {}".format(args.path))
+
+    prefix_exists = False
+    for filename in os.listdir(args.path):
+        if filename.startswith(args.prefix):
+            prefix_exists = True
+            break
+    else:
+        if not prefix_exists:
+            exit("Unable to find a file with prefix {} in {}".format(args.prefix, args.path))
+
+    for column in ('key', 'description', 'path', 'prefix'):
+        value = getattr(args, column)
+        if '\t' in value:
+            exit("TAB character found in {} argument".format(column))
+
+    data_manager_dict = {}
+    data_table_entry = dict(key=args.key, description=args.description, path=args.path, prefix=args.prefix)
+    _add_data_table_entry(data_manager_dict, 'shapeit_ref', data_table_entry)
+
+    args.output_file.write(json.dumps(data_manager_dict) + '\n')
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/shapeit_ref.xml	Tue Sep 11 10:39:38 2018 -0400
@@ -0,0 +1,30 @@
+<tool id="shapeit_reference_manager" name="SHAPEIT reference data path manager" tool_type="manage_data" version="0.0.1">
+    <command detect_errors="aggressive"><![CDATA[
+    python $__tool_directory__/shapeit_ref.py
+        '${key}'
+        '${description}'
+        '${path}'
+        '${prefix}'
+        '${output_file}'
+    ]]></command>>
+    <inputs>
+        <param name="key" type="text" label="Short key to identify this reference set (no spaces)" />
+        <param name="description" type="text" label="Description of the reference data" />
+        <param name="path" type="text" label="Filesystem path to directory containing this reference set" />
+        <param name="prefix" type="text" label="Filename prefix for files in this reference set" />
+    </inputs>
+    <outputs>
+        <data name="output_file" format="data_manager_json" />
+    </outputs>
+    <help><![CDATA[
+    This data manager updates the *shapeit_ref* table with information about locally-maintained reference data
+    used by the SHAPEIT_ haplotype estimation (i.e. phasing) package. An example for the 1000 Genomes
+    dataset is available here_.
+
+    .. _SHAPEIT: http://mathgen.stats.ox.ac.uk/genetics_software/shapeit/shapeit.html
+    .. _here: https://mathgen.stats.ox.ac.uk/impute/1000GP_Phase3.html
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1038/nmeth.1785</citation>
+    </citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml	Tue Sep 11 10:39:38 2018 -0400
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<data_managers>    
+    <data_manager tool_file="data_manager/shapeit_ref.xml" id="data_manager_shapeit_reference">
+        <data_table name="shapeit_ref">
+            <output>
+                <column name="key" />
+                <column name="description" />
+                <column name="path" />
+                <column name="prefix" />
+            </output>
+        </data_table>
+    </data_manager>    
+</data_managers>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/shapeit_ref.loc.sample	Tue Sep 11 10:39:38 2018 -0400
@@ -0,0 +1,7 @@
+#This file lists the locations of SHAPEIT reference data connections
+#
+#<unique_key>	<description>		<path>	<file_prefix>
+#
+#For example
+#
+#1000G_Phase3	1000 Genomes project haplotypes		/path/to/shapeit_ref/1000G  1000GP_Phase3_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_tables_conf.xml.sample	Tue Sep 11 10:39:38 2018 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="shapeit_ref" comment_char="#">
+        <columns>key, description, path, prefix</columns>
+        <file path="tool-data/shapeit_ref.loc" />
+    </table>
+</tables>
\ No newline at end of file