comparison data_manager/data_manager_fetch_motifs.py @ 10:a5421f83f972 draft

Uploaded
author jeremyjliu
date Thu, 26 Feb 2015 22:52:42 -0500
parents e4229c66d78d
children 24a2c2783fb8
comparison
equal deleted inserted replaced
9:e4229c66d78d 10:a5421f83f972
21 CHUNK_SIZE = 2**20 #1mb 21 CHUNK_SIZE = 2**20 #1mb
22 22
23 def download_motif_databases( data_manager_dict, params, target_directory, motif_db ): 23 def download_motif_databases( data_manager_dict, params, target_directory, motif_db ):
24 24
25 # Select download URL, file name, data table name, and path using motif_db selector variable 25 # Select download URL, file name, data table name, and path using motif_db selector variable
26 if motif_db == "pouya": 26 if motif_db == "encode":
27 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz', 27 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz',
28 "pouya_motifs.bed.bgz", "pouya_bgz", "Pouya Encode Motifs (hg19) BGZ"] 28 "pouya_motifs.bed.bgz", "encode_bgz", "Encode Motifs (hg19) BGZ"]
29 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz.tbi', 29 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz.tbi',
30 "pouya_motifs.bed.bgz.tbi", "pouya_tbi", "Pouya Encode Motifs (hg19) TBI"] 30 "pouya_motifs.bed.bgz.tbi", "encode_tbi", "Encode Motifs (hg19) TBI"]
31 PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/pouya.pwms.from.seq.RData',
32 "pouya.pwms.from.seq.RData", "encode_pwm", "Encode Motifs (hg19) PWM"]
31 elif motif_db == "jaspar": 33 elif motif_db == "jaspar":
32 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz', 34 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz',
33 "jaspar_jolma_motifs.bed.bgz", "jaspar_bgz", "Jaspar and Jolma Motifs (hg19) BGZ"] 35 "jaspar_jolma_motifs.bed.bgz", "jaspar_bgz", "Jaspar and Jolma Motifs (hg19) BGZ"]
34 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz.tbi', 36 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz.tbi',
35 "jaspar_jolma_motifs.bed.bgz.tbi", "jaspar_tbi", "Jaspar and Jolma Motifs (hg19) TBI"] 37 "jaspar_jolma_motifs.bed.bgz.tbi", "jaspar_tbi", "Jaspar and Jolma Motifs (hg19) TBI"]
38 PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/jaspar.jolma.pwms.from.seq.RData',
39 "jaspar.jolma.pwms.from.seq.RData", "jaspar_pwm", "Jaspar and Jolma Motifs (hg19) PWM"]
36 elif motif_db == "mouse": 40 elif motif_db == "mouse":
37 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz', 41 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz',
38 "mm9_motifs_split.bed.bgz", "mouse_bgz", "Mouse Motifs (mm9) BGZ"] 42 "mm9_motifs_split.bed.bgz", "mouse_bgz", "Mouse Motifs (mm9) BGZ"]
39 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz.tbi', 43 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz.tbi',
40 "mm9_motifs_split.bed.bgz.tbi", "mouse_tbi", "Mouse Motifs (mm9) TBI"] 44 "mm9_motifs_split.bed.bgz.tbi", "mouse_tbi", "Mouse Motifs (mm9) TBI"]
45 PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/mm9.pwms.from.seq.RData',
46 "mm9.pwms.from.seq.RData", "mouse_pwm", "Mouse Motifs (mm9) PWM"]
41 else: 47 else:
42 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz', 48 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz',
43 "pouya_test_motifs.bed.bgz", "test_bgz", "Test Pouya Subset (hg19) BGZ"] 49 "pouya_test_motifs.bed.bgz", "test_bgz", "Test Encode Motifs (hg19) BGZ"]
44 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz.tbi', 50 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz.tbi',
45 "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Pouya Subset (hg19) TBI"] 51 "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Encode Motifs (hg19) TBI"]
52 PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/pouya.pwms.from.seq.RData',
53 "pouya.pwms.from.seq.RData", "test_pwm", "Test Encode Motifs (hg19) PWM"]
54
46 55
47 # Save and add motif bgz file to motif_databases data table 56 # Save and add motif bgz file to motif_databases data table
48 bgz_reader = urllib2.urlopen( BGZ[0] ) 57 bgz_reader = urllib2.urlopen( BGZ[0] )
49 bgz_data_table_entry = _stream_fasta_to_file( bgz_reader, target_directory, params, 58 bgz_data_table_entry = _stream_fasta_to_file( bgz_reader, target_directory, params,
50 BGZ[1], BGZ[2], BGZ[3] ) 59 BGZ[1], BGZ[2], BGZ[3] )
52 61
53 # Save and add motif tbi file to motif_databases data table 62 # Save and add motif tbi file to motif_databases data table
54 tbi_reader = urllib2.urlopen( TBI[0] ) 63 tbi_reader = urllib2.urlopen( TBI[0] )
55 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params, 64 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params,
56 TBI[1], TBI[2], TBI[3] ) 65 TBI[1], TBI[2], TBI[3] )
66 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry )
67
68 # Save and add motif pwm file to motif_databases data table
69 tbi_reader = urllib2.urlopen( PWM[0] )
70 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params,
71 PWM[1], PWM[2], PWM[3] )
57 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry ) 72 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry )
58 73
59 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): 74 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ):
60 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) 75 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} )
61 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) 76 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] )