Mercurial > repos > sanbi-uwc > data_manager_novocraft_index_builder
changeset 18:2b89ba1c0057 draft default tip
planemo upload for repository https://github.com/zipho/data_manager_novocraft_index_builder commit c8c46a5600bb091d701b8cf78f80a50c6b6812f4
author | sanbi-uwc |
---|---|
date | Tue, 21 Jun 2016 10:12:02 -0400 |
parents | db293ee25be5 |
children | |
files | data_manager/novocraft_index_builder.py data_manager/novocraft_index_builder.xml |
diffstat | 2 files changed, 19 insertions(+), 39 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/novocraft_index_builder.py Fri Apr 15 09:03:40 2016 -0400 +++ b/data_manager/novocraft_index_builder.py Tue Jun 21 10:12:02 2016 -0400 @@ -4,7 +4,6 @@ from __future__ import print_function import os import sys -import urllib2 import logging import argparse import shlex @@ -16,13 +15,14 @@ DEFAULT_DATA_TABLE_NAME = "novocraft_index" -def get_dbkey_id_name(params, dbkey_description=None): +def get_dbkey_id_name(params): # TODO: ensure sequence_id is unique and does not already appear in location file sequence_id = params['param_dict']['sequence_id'] sequence_name = params['param_dict']['sequence_name'] - if not sequence_name: - sequence_name = dbkey_description - return sequence_id, sequence_name + sequence_desc = params['param_dict']['sequence_desc'] + if not sequence_desc: + sequence_desc = sequence_name + return sequence_id, sequence_name, sequence_desc def _make_novocraft_index(data_manager_dict, fasta_filename, target_directory, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME): if os.path.exists(target_directory) and not os.path.isdir(target_directory): @@ -50,25 +50,10 @@ data_manager_dict['data_tables'][ data_table_name ].append( data_table_entry ) return data_manager_dict -def download_from_url( data_manager_dict, params, target_directory, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME ): - # TODO: we should automatically do decompression here - urls = filter(bool, map(lambda x: x.strip(), params['param_dict']['reference_source']['user_url'].split('\n'))) - fasta_reader = [urllib2.urlopen(url) for url in urls] - - _make_novocraft_index(data_manager_dict, fasta_reader, target_directory, sequence_id, sequence_name, data_table_name) - -def download_from_history( data_manager_dict, params, target_directory, sequence_id, sequence_name, data_table_name=DEFAULT_DATA_TABLE_NAME ): - # TODO: allow multiple FASTA input files - input_filename = params['param_dict']['reference_source']['input_fasta'] - - _make_novocraft_index(data_manager_dict, input_filename, target_directory, sequence_id, sequence_name, data_table_name ) - -REFERENCE_SOURCE_TO_DOWNLOAD = dict(url=download_from_url, history=download_from_history) - def main(): parser = argparse.ArgumentParser(description="Generate Novo-craft genome index and JSON describing this") parser.add_argument('output_filename') - parser.add_argument('--dbkey_description') + parser.add_argument('--input_filename') parser.add_argument('--data_table_name', default='novocraft_index') args = parser.parse_args() @@ -79,12 +64,11 @@ os.makedirs(target_directory) data_manager_dict = {} - sequence_id, sequence_name = get_dbkey_id_name(params, dbkey_description=args.dbkey_description) + sequence_id, sequence_name, sequence_desc = get_dbkey_id_name(params) - # Fetch the FASTA - REFERENCE_SOURCE_TO_DOWNLOAD[params['param_dict']['reference_source']['reference_source_selector']]\ - (data_manager_dict, params, target_directory, sequence_id, sequence_name, data_table_name=args.data_table_name or DEFAULT_DATA_TABLE_NAME ) + #Make novocraft index + _make_novocraft_index(data_manager_dict, args.input_filename, target_directory, sequence_id, sequence_name, args.data_table_name or DEFAULT_DATA_TABLE_NAME ) open(filename, 'wb').write(dumps( data_manager_dict )) -if __name__ == "__main__": main() +if __name__ == "__main__": main() \ No newline at end of file
--- a/data_manager/novocraft_index_builder.xml Fri Apr 15 09:03:40 2016 -0400 +++ b/data_manager/novocraft_index_builder.xml Tue Jun 21 10:12:02 2016 -0400 @@ -5,25 +5,21 @@ <exit_code range=":-1" /> <exit_code range="1:" /> </stdio> + <requirements> + <requirement type="package" version="1.2">samtools</requirement> + <requirement type="package" version="3.04.01">novocraft</requirement> + </requirements> <command interpreter="python"> - novocraft_index_builder.py "${out_file}" --data_table_name "novocraft_index" + novocraft_index_builder.py "${out_file}" --input_filename "${input_fasta.fields.path}" --data_table_name "novocraft_index" </command> <inputs> <param type="text" name="sequence_name" value="" label="Name of sequence" optional="False" /> <param type="text" name="sequence_desc" value="" label="Description of sequence" /> <param type="text" name="sequence_id" value="" label="ID for sequence" optional="False" /> - <conditional name="reference_source"> - <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> - <option value="url">URL</option> - <option value="history">History</option> - </param> - <when value="url"> - <param type="text" area="True" name="user_url" value="http://" label="URLs" optional="False" /> - </when> - <when value="history"> - <param name="input_fasta" type="data" format="fasta" label="FASTA File" multiple="False" optional="False" /> - </when> - </conditional> + <param name="input_fasta" type="select" label="Genome to index(FASTA format)"> + <options from_data_table="all_fasta"/> + </param> + </inputs> <outputs> <data name="out_file" format="data_manager_json" />