0
|
1 #!/usr/bin/env python
|
|
2 #Dan Blankenberg
|
|
3
|
|
4 import sys
|
|
5 import os
|
|
6 import tempfile
|
|
7 import shutil
|
|
8 import optparse
|
|
9 import urllib2
|
|
10 #import uuid
|
|
11 from ftplib import FTP
|
|
12 import tarfile
|
|
13 import zipfile
|
|
14 import gzip
|
|
15 import bz2
|
|
16
|
|
17 from galaxy.util.json import from_json_string, to_json_string
|
|
18
|
|
19 CHUNK_SIZE = 2**20 #1mb
|
|
20
|
|
21 def download_motif_databases( data_manager_dict, params, target_directory, motif_db ):
|
4
|
22 TEST_BGZ_URL = 'http://gehlenborg.com/wp-content/uploads/motif/pouya_test_motifs.bed.bgz'
|
|
23 TEST_TBI_URL = 'http://gehlenborg.com/wp-content/uploads/motif/pouya_test_motifs.bed.bgz.tbi'
|
|
24 POUYA_BGZ_URL = ''
|
|
25 POUYA_TBI_URL = ''
|
|
26 JOLMA_BGZ_URL = ''
|
|
27 JOLMA_TBI_URL = ''
|
|
28 MM9_BGZ_URL = ''
|
|
29 MM9_TBI_URL = ''
|
0
|
30
|
4
|
31 bgz_reader = urllib2.urlopen( TEST_BGZ_URL )
|
|
32 bgz_data_table_entry = _stream_fasta_to_file( bgz_reader, target_directory, params,
|
5
|
33 "pouya_test_motifs.bed.bgz", "test_bgz", "Test Pouya Subset BGZ (hg19)" )
|
4
|
34 _add_data_table_entry( data_manager_dict, 'motif_databases', bgz_data_table_entry )
|
|
35
|
|
36 tbi_reader = urllib2.urlopen( TEST_TBI_URL )
|
|
37 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params,
|
5
|
38 "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Pouya Subset TBI (hg19)" )
|
4
|
39 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry )
|
0
|
40
|
2
|
41 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ):
|
0
|
42 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
|
2
|
43 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] )
|
|
44 data_manager_dict['data_tables'][data_table].append( data_table_entry )
|
0
|
45 return data_manager_dict
|
|
46
|
5
|
47 def _stream_fasta_to_file( fasta_stream, target_directory, params,
|
|
48 fasta_base_filename, value, name, close_stream=True ):
|
0
|
49 fasta_filename = os.path.join( target_directory, fasta_base_filename )
|
|
50 fasta_writer = open( fasta_filename, 'wb+' )
|
|
51
|
|
52 while True:
|
|
53 buffer = fasta_stream.read(CHUNK_SIZE)
|
|
54 if not buffer:
|
|
55 break
|
|
56
|
|
57 fasta_writer.write(buffer)
|
|
58
|
|
59 fasta_stream.close()
|
|
60 fasta_writer.close()
|
|
61
|
4
|
62 return dict( value=value, name=name, path=fasta_base_filename )
|
0
|
63
|
|
64 def main():
|
|
65 #Parse Command Line
|
|
66 parser = optparse.OptionParser()
|
|
67 parser.add_option( '-m', '--motif_db', dest='motif_db', action='store', type="string", default=None, help='motif_db' )
|
|
68 (options, args) = parser.parse_args()
|
|
69
|
|
70 filename = args[0]
|
|
71
|
|
72 params = from_json_string( open( filename ).read() )
|
|
73 target_directory = params[ 'output_data' ][0]['extra_files_path']
|
|
74 os.mkdir( target_directory )
|
|
75 data_manager_dict = {}
|
|
76
|
|
77 #Fetch the Motif Database
|
1
|
78 download_motif_databases( data_manager_dict, params, target_directory, options.motif_db )
|
0
|
79
|
|
80 #save info to json file
|
|
81 open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
|
|
82
|
|
83 if __name__ == "__main__": main()
|