Mercurial > repos > sanbi-uwc > data_manager_novoalign_index_builder
comparison data_manager/novoalign_index_builder.py @ 0:85fbd52dbb36 draft
planemo upload for repository https://github.com/zipho/data_manager_novoalign_index_builder commit d51fdc6291de173e829a839e98c6c3ae367d84bf
author | sanbi-uwc |
---|---|
date | Thu, 03 Mar 2016 05:59:41 -0500 |
parents | |
children | e51fb8188ed9 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:85fbd52dbb36 |
---|---|
1 #!/usr/bin/env python | |
2 # Z. Mashologu (SANBI-UWC) | |
3 #import dict as dict | |
4 import os | |
5 import shutil | |
6 import optparse | |
7 import urllib2 | |
8 import logging | |
9 log = logging.getLogger( __name__ ) | |
10 | |
11 from json import loads, dumps | |
12 | |
13 def cleanup_before_exit( tmp_dir ): | |
14 if tmp_dir and os.path.exists( tmp_dir ): | |
15 shutil.rmtree( tmp_dir ) | |
16 | |
17 def _stream_fasta_to_file( fasta_stream, target_directory, params, close_stream=True ): | |
18 fasta_base_filename = "%s.fa" % sequence_id | |
19 fasta_filename = os.path.join( target_directory, fasta_base_filename ) | |
20 fasta_writer = open( fasta_filename, 'wb+' ) | |
21 | |
22 if isinstance( fasta_stream, list ) and len( fasta_stream ) == 1: | |
23 fasta_stream = fasta_stream[0] | |
24 | |
25 if isinstance( fasta_stream, list ): | |
26 last_char = None | |
27 for fh in fasta_stream: | |
28 if last_char not in [ None, '\n', '\r' ]: | |
29 fasta_writer.write( '\n' ) | |
30 while True: | |
31 data = fh.read( CHUNK_SIZE ) | |
32 if data: | |
33 fasta_writer.write( data ) | |
34 last_char = data[-1] | |
35 else: | |
36 break | |
37 if close_stream: | |
38 fh.close() | |
39 else: | |
40 while True: | |
41 data = fasta_stream.read( CHUNK_SIZE ) | |
42 if data: | |
43 fasta_writer.write( data ) | |
44 else: | |
45 break | |
46 if close_stream: | |
47 fasta_stream.close() | |
48 | |
49 fasta_writer.close() | |
50 | |
51 return dict( path=fasta_base_filename ) | |
52 | |
53 def download_from_url( data_manager_dict, params, target_directory, dbkey, sequence_id, sequence_name ): | |
54 #TODO: we should automatically do decompression here | |
55 urls = filter( bool, map( lambda x: x.strip(), params['param_dict']['reference_source']['user_url'].split( '\n' ) ) ) | |
56 fasta_reader = [ urllib2.urlopen( url ) for url in urls ] | |
57 | |
58 data_table_entry = _stream_fasta_to_file( fasta_reader, target_directory, params ) | |
59 _add_data_table_entry( data_manager_dict, data_table_entry ) | |
60 | |
61 def download_from_history( data_manager_dict, params, target_directory): | |
62 #TODO: allow multiple FASTA input files | |
63 input_filename = params['param_dict']['reference_source']['input_fasta'] | |
64 if isinstance( input_filename, list ): | |
65 fasta_reader = [ open( filename, 'rb' ) for filename in input_filename ] | |
66 else: | |
67 fasta_reader = open( input_filename ) | |
68 | |
69 data_table_entry = _stream_fasta_to_file( fasta_reader, target_directory, params ) | |
70 _add_data_table_entry( data_manager_dict, data_table_entry ) | |
71 | |
72 def copy_from_directory( data_manager_dict, params, target_directory ): | |
73 input_filename = params['param_dict']['reference_source']['fasta_filename'] | |
74 create_symlink = params['param_dict']['reference_source']['create_symlink'] == 'create_symlink' | |
75 if create_symlink: | |
76 data_table_entry = _create_symlink( input_filename, target_directory ) | |
77 else: | |
78 if isinstance( input_filename, list ): | |
79 fasta_reader = [ open( filename, 'rb' ) for filename in input_filename ] | |
80 else: | |
81 fasta_reader = open( input_filename ) | |
82 data_table_entry = _stream_fasta_to_file( fasta_reader, target_directory, params ) | |
83 _add_data_table_entry( data_manager_dict, data_table_entry ) | |
84 | |
85 def _create_symlink( input_filename, target_directory ): | |
86 fasta_base_filename = "%s.fa" % sequence_id | |
87 fasta_filename = os.path.join( target_directory, fasta_base_filename ) | |
88 os.symlink( input_filename, fasta_filename ) | |
89 return dict( path=fasta_base_filename ) | |
90 | |
91 REFERENCE_SOURCE_TO_DOWNLOAD = dict( url=download_from_url, history=download_from_history, directory=copy_from_directory ) | |
92 | |
93 def main(): | |
94 #Parse Command Line | |
95 parser = optparse.OptionParser() | |
96 parser.add_option( '-d', '--data_table_name' ) | |
97 (options, args) = parser.parse_args() | |
98 | |
99 filename = args[0] | |
100 | |
101 params = loads( open( filename ).read() ) | |
102 target_directory = params[ 'output_data' ][0]['extra_files_path'] | |
103 os.mkdir( target_directory ) | |
104 data_manager_dict = {} | |
105 | |
106 #Fetch the FASTA | |
107 REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( data_manager_dict, params, target_directory ) | |
108 | |
109 #save info to json file | |
110 open( filename, 'wb' ).write( dumps( data_manager_dict ) ) | |
111 | |
112 if __name__ == "__main__": main() |