annotate data_manager/data_manager_fetch_motifs.py @ 4:75d825e1b00d draft

Uploaded
author jeremyjliu
date Wed, 28 Jan 2015 22:39:46 -0500
parents d5faf2b51b07
children 6621a6ac8bb4
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
1 #!/usr/bin/env python
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
2 #Dan Blankenberg
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
3
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
4 import sys
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
5 import os
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
6 import tempfile
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
7 import shutil
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
8 import optparse
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
9 import urllib2
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
10 #import uuid
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
11 from ftplib import FTP
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
12 import tarfile
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
13 import zipfile
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
14 import gzip
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
15 import bz2
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
16
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
17 from galaxy.util.json import from_json_string, to_json_string
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
18
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
19 CHUNK_SIZE = 2**20 #1mb
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
20
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
21 def download_motif_databases( data_manager_dict, params, target_directory, motif_db ):
4
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
22 TEST_BGZ_URL = 'http://gehlenborg.com/wp-content/uploads/motif/pouya_test_motifs.bed.bgz'
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
23 TEST_TBI_URL = 'http://gehlenborg.com/wp-content/uploads/motif/pouya_test_motifs.bed.bgz.tbi'
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
24 POUYA_BGZ_URL = ''
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
25 POUYA_TBI_URL = ''
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
26 JOLMA_BGZ_URL = ''
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
27 JOLMA_TBI_URL = ''
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
28 MM9_BGZ_URL = ''
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
29 MM9_TBI_URL = ''
0
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
30
4
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
31 bgz_reader = urllib2.urlopen( TEST_BGZ_URL )
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
32 bgz_data_table_entry = _stream_fasta_to_file( bgz_reader, target_directory, params,
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
33 "test_bgz", "pouya_test_motifs.bed.bgz" )
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
34 _add_data_table_entry( data_manager_dict, 'motif_databases', bgz_data_table_entry )
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
35
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
36 tbi_reader = urllib2.urlopen( TEST_TBI_URL )
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
37 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params,
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
38 "test_tbi", "pouya_test_motifs.bed.bgz" )
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
39 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry )
0
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
40
2
d5faf2b51b07 Uploaded
jeremyjliu
parents: 1
diff changeset
41 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ):
0
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
42 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
2
d5faf2b51b07 Uploaded
jeremyjliu
parents: 1
diff changeset
43 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] )
d5faf2b51b07 Uploaded
jeremyjliu
parents: 1
diff changeset
44 data_manager_dict['data_tables'][data_table].append( data_table_entry )
0
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
45 return data_manager_dict
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
46
4
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
47 def _stream_fasta_to_file( fasta_stream, target_directory, params, close_stream=True,
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
48 fasta_base_filename, value, name ):
0
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
49 fasta_base_filename = "pouya_test_motifs.bed.bgz"
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
50 fasta_filename = os.path.join( target_directory, fasta_base_filename )
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
51 fasta_writer = open( fasta_filename, 'wb+' )
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
52
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
53 while True:
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
54 buffer = fasta_stream.read(CHUNK_SIZE)
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
55 if not buffer:
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
56 break
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
57
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
58 fasta_writer.write(buffer)
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
59
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
60 fasta_stream.close()
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
61 fasta_writer.close()
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
62
4
75d825e1b00d Uploaded
jeremyjliu
parents: 2
diff changeset
63 return dict( value=value, name=name, path=fasta_base_filename )
0
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
64
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
65 def main():
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
66 #Parse Command Line
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
67 parser = optparse.OptionParser()
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
68 parser.add_option( '-m', '--motif_db', dest='motif_db', action='store', type="string", default=None, help='motif_db' )
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
69 (options, args) = parser.parse_args()
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
70
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
71 filename = args[0]
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
72
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
73 params = from_json_string( open( filename ).read() )
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
74 target_directory = params[ 'output_data' ][0]['extra_files_path']
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
75 os.mkdir( target_directory )
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
76 data_manager_dict = {}
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
77
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
78 #Fetch the Motif Database
1
2ed33f9b9a47 Uploaded
jeremyjliu
parents: 0
diff changeset
79 download_motif_databases( data_manager_dict, params, target_directory, options.motif_db )
0
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
80
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
81 #save info to json file
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
82 open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
83
ba049ce65693 Initial upload
jeremyjliu
parents:
diff changeset
84 if __name__ == "__main__": main()