# HG changeset patch # User sanbi-uwc # Date 1457002781 18000 # Node ID 85fbd52dbb3619022dc6b797ef2b3b2ed58b1445 planemo upload for repository https://github.com/zipho/data_manager_novoalign_index_builder commit d51fdc6291de173e829a839e98c6c3ae367d84bf diff -r 000000000000 -r 85fbd52dbb36 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Thu Mar 03 05:59:41 2016 -0500 @@ -0,0 +1,2 @@ +# Data Manager NovoAlign Index Builder +Data Manager to build Novo-Align index diff -r 000000000000 -r 85fbd52dbb36 data_manager/novoalign_index_builder.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/novoalign_index_builder.py Thu Mar 03 05:59:41 2016 -0500 @@ -0,0 +1,112 @@ +#!/usr/bin/env python +# Z. Mashologu (SANBI-UWC) +#import dict as dict +import os +import shutil +import optparse +import urllib2 +import logging +log = logging.getLogger( __name__ ) + +from json import loads, dumps + +def cleanup_before_exit( tmp_dir ): + if tmp_dir and os.path.exists( tmp_dir ): + shutil.rmtree( tmp_dir ) + +def _stream_fasta_to_file( fasta_stream, target_directory, params, close_stream=True ): + fasta_base_filename = "%s.fa" % sequence_id + fasta_filename = os.path.join( target_directory, fasta_base_filename ) + fasta_writer = open( fasta_filename, 'wb+' ) + + if isinstance( fasta_stream, list ) and len( fasta_stream ) == 1: + fasta_stream = fasta_stream[0] + + if isinstance( fasta_stream, list ): + last_char = None + for fh in fasta_stream: + if last_char not in [ None, '\n', '\r' ]: + fasta_writer.write( '\n' ) + while True: + data = fh.read( CHUNK_SIZE ) + if data: + fasta_writer.write( data ) + last_char = data[-1] + else: + break + if close_stream: + fh.close() + else: + while True: + data = fasta_stream.read( CHUNK_SIZE ) + if data: + fasta_writer.write( data ) + else: + break + if close_stream: + fasta_stream.close() + + fasta_writer.close() + + return dict( path=fasta_base_filename ) + +def download_from_url( data_manager_dict, params, target_directory, dbkey, sequence_id, sequence_name ): + #TODO: we should automatically do decompression here + urls = filter( bool, map( lambda x: x.strip(), params['param_dict']['reference_source']['user_url'].split( '\n' ) ) ) + fasta_reader = [ urllib2.urlopen( url ) for url in urls ] + + data_table_entry = _stream_fasta_to_file( fasta_reader, target_directory, params ) + _add_data_table_entry( data_manager_dict, data_table_entry ) + +def download_from_history( data_manager_dict, params, target_directory): + #TODO: allow multiple FASTA input files + input_filename = params['param_dict']['reference_source']['input_fasta'] + if isinstance( input_filename, list ): + fasta_reader = [ open( filename, 'rb' ) for filename in input_filename ] + else: + fasta_reader = open( input_filename ) + + data_table_entry = _stream_fasta_to_file( fasta_reader, target_directory, params ) + _add_data_table_entry( data_manager_dict, data_table_entry ) + +def copy_from_directory( data_manager_dict, params, target_directory ): + input_filename = params['param_dict']['reference_source']['fasta_filename'] + create_symlink = params['param_dict']['reference_source']['create_symlink'] == 'create_symlink' + if create_symlink: + data_table_entry = _create_symlink( input_filename, target_directory ) + else: + if isinstance( input_filename, list ): + fasta_reader = [ open( filename, 'rb' ) for filename in input_filename ] + else: + fasta_reader = open( input_filename ) + data_table_entry = _stream_fasta_to_file( fasta_reader, target_directory, params ) + _add_data_table_entry( data_manager_dict, data_table_entry ) + +def _create_symlink( input_filename, target_directory ): + fasta_base_filename = "%s.fa" % sequence_id + fasta_filename = os.path.join( target_directory, fasta_base_filename ) + os.symlink( input_filename, fasta_filename ) + return dict( path=fasta_base_filename ) + +REFERENCE_SOURCE_TO_DOWNLOAD = dict( url=download_from_url, history=download_from_history, directory=copy_from_directory ) + +def main(): + #Parse Command Line + parser = optparse.OptionParser() + parser.add_option( '-d', '--data_table_name' ) + (options, args) = parser.parse_args() + + filename = args[0] + + params = loads( open( filename ).read() ) + target_directory = params[ 'output_data' ][0]['extra_files_path'] + os.mkdir( target_directory ) + data_manager_dict = {} + + #Fetch the FASTA + REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( data_manager_dict, params, target_directory ) + + #save info to json file + open( filename, 'wb' ).write( dumps( data_manager_dict ) ) + +if __name__ == "__main__": main() \ No newline at end of file diff -r 000000000000 -r 85fbd52dbb36 data_manager/novoalign_index_builder.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager/novoalign_index_builder.xml Thu Mar 03 05:59:41 2016 -0500 @@ -0,0 +1,53 @@ + + + Build an index for use by the Novo Align mapping tool + + novoalign + + + + + + + novoalign_index_builder.py "${out_file}" --data_table_name "novo_index" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Help! + + + + diff -r 000000000000 -r 85fbd52dbb36 data_manager_conf.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/data_manager_conf.xml Thu Mar 03 05:59:41 2016 -0500 @@ -0,0 +1,18 @@ + + + + + + + + + + ${path} + ${path} + + ${GALAXY_DATA_INDEX_DIR}/${path} + + + + + \ No newline at end of file diff -r 000000000000 -r 85fbd52dbb36 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Mar 03 05:59:41 2016 -0500 @@ -0,0 +1,3 @@ + + +