Mercurial > repos > sanbi-uwc > data_manager_shapeit_reference
changeset 0:f86c65c60056 draft
planemo upload for repository https://github.com/pvanheus/data_manager_shapeit_reference commit c9acc04bcb85873e6eb78b4163d1a7daa2679f22
author | sanbi-uwc |
---|---|
date | Tue, 11 Sep 2018 10:39:38 -0400 |
parents | |
children | 52f0767fe68d |
files | LICENSE data_manager/shapeit_ref.py data_manager/shapeit_ref.xml data_manager_conf.xml tool-data/shapeit_ref.loc.sample tool_data_tables_conf.xml.sample |
diffstat | 6 files changed, 125 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Tue Sep 11 10:39:38 2018 -0400 @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2018 pvanheus + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/shapeit_ref.py Tue Sep 11 10:39:38 2018 -0400 @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +from __future__ import division, print_function +import argparse +import json +import os +import os.path + + +def _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry): + data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {}) + data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get(data_table_name, []) + data_manager_dict['data_tables'][data_table_name].append(data_table_entry) + return data_manager_dict + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Generate a data manager output for SHAPEIT reference data') + parser.add_argument('key', help='Short key to identify this reference set (no spaces)') + parser.add_argument('description', help='Description of reference set') + parser.add_argument('path', help='Filesystem path to directory containing this reference set') + parser.add_argument('prefix', help='Filename prefix for files in this reference set') + parser.add_argument('output_file', type=argparse.FileType('w'), help='JSON file used to write data manager values to') + args = parser.parse_args() + + if not os.path.exists(args.path): + exit("Unable to find specified path {}".format(args.path)) + + prefix_exists = False + for filename in os.listdir(args.path): + if filename.startswith(args.prefix): + prefix_exists = True + break + else: + if not prefix_exists: + exit("Unable to find a file with prefix {} in {}".format(args.prefix, args.path)) + + for column in ('key', 'description', 'path', 'prefix'): + value = getattr(args, column) + if '\t' in value: + exit("TAB character found in {} argument".format(column)) + + data_manager_dict = {} + data_table_entry = dict(key=args.key, description=args.description, path=args.path, prefix=args.prefix) + _add_data_table_entry(data_manager_dict, 'shapeit_ref', data_table_entry) + + args.output_file.write(json.dumps(data_manager_dict) + '\n')
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/shapeit_ref.xml Tue Sep 11 10:39:38 2018 -0400 @@ -0,0 +1,30 @@ +<tool id="shapeit_reference_manager" name="SHAPEIT reference data path manager" tool_type="manage_data" version="0.0.1"> + <command detect_errors="aggressive"><![CDATA[ + python $__tool_directory__/shapeit_ref.py + '${key}' + '${description}' + '${path}' + '${prefix}' + '${output_file}' + ]]></command>> + <inputs> + <param name="key" type="text" label="Short key to identify this reference set (no spaces)" /> + <param name="description" type="text" label="Description of the reference data" /> + <param name="path" type="text" label="Filesystem path to directory containing this reference set" /> + <param name="prefix" type="text" label="Filename prefix for files in this reference set" /> + </inputs> + <outputs> + <data name="output_file" format="data_manager_json" /> + </outputs> + <help><![CDATA[ + This data manager updates the *shapeit_ref* table with information about locally-maintained reference data + used by the SHAPEIT_ haplotype estimation (i.e. phasing) package. An example for the 1000 Genomes + dataset is available here_. + + .. _SHAPEIT: http://mathgen.stats.ox.ac.uk/genetics_software/shapeit/shapeit.html + .. _here: https://mathgen.stats.ox.ac.uk/impute/1000GP_Phase3.html + ]]></help> + <citations> + <citation type="doi">10.1038/nmeth.1785</citation> + </citations> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Tue Sep 11 10:39:38 2018 -0400 @@ -0,0 +1,13 @@ +<?xml version="1.0"?> +<data_managers> + <data_manager tool_file="data_manager/shapeit_ref.xml" id="data_manager_shapeit_reference"> + <data_table name="shapeit_ref"> + <output> + <column name="key" /> + <column name="description" /> + <column name="path" /> + <column name="prefix" /> + </output> + </data_table> + </data_manager> +</data_managers>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/shapeit_ref.loc.sample Tue Sep 11 10:39:38 2018 -0400 @@ -0,0 +1,7 @@ +#This file lists the locations of SHAPEIT reference data connections +# +#<unique_key> <description> <path> <file_prefix> +# +#For example +# +#1000G_Phase3 1000 Genomes project haplotypes /path/to/shapeit_ref/1000G 1000GP_Phase3_
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_tables_conf.xml.sample Tue Sep 11 10:39:38 2018 -0400 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="shapeit_ref" comment_char="#"> + <columns>key, description, path, prefix</columns> + <file path="tool-data/shapeit_ref.loc" /> + </table> +</tables> \ No newline at end of file