Mercurial > repos > jeremyjliu > region_motif_data_manager
comparison data_manager/data_manager_fetch_motifs.py @ 10:a5421f83f972 draft
Uploaded
author | jeremyjliu |
---|---|
date | Thu, 26 Feb 2015 22:52:42 -0500 |
parents | e4229c66d78d |
children | 24a2c2783fb8 |
comparison
equal
deleted
inserted
replaced
9:e4229c66d78d | 10:a5421f83f972 |
---|---|
21 CHUNK_SIZE = 2**20 #1mb | 21 CHUNK_SIZE = 2**20 #1mb |
22 | 22 |
23 def download_motif_databases( data_manager_dict, params, target_directory, motif_db ): | 23 def download_motif_databases( data_manager_dict, params, target_directory, motif_db ): |
24 | 24 |
25 # Select download URL, file name, data table name, and path using motif_db selector variable | 25 # Select download URL, file name, data table name, and path using motif_db selector variable |
26 if motif_db == "pouya": | 26 if motif_db == "encode": |
27 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz', | 27 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz', |
28 "pouya_motifs.bed.bgz", "pouya_bgz", "Pouya Encode Motifs (hg19) BGZ"] | 28 "pouya_motifs.bed.bgz", "encode_bgz", "Encode Motifs (hg19) BGZ"] |
29 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz.tbi', | 29 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz.tbi', |
30 "pouya_motifs.bed.bgz.tbi", "pouya_tbi", "Pouya Encode Motifs (hg19) TBI"] | 30 "pouya_motifs.bed.bgz.tbi", "encode_tbi", "Encode Motifs (hg19) TBI"] |
31 PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/pouya.pwms.from.seq.RData', | |
32 "pouya.pwms.from.seq.RData", "encode_pwm", "Encode Motifs (hg19) PWM"] | |
31 elif motif_db == "jaspar": | 33 elif motif_db == "jaspar": |
32 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz', | 34 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz', |
33 "jaspar_jolma_motifs.bed.bgz", "jaspar_bgz", "Jaspar and Jolma Motifs (hg19) BGZ"] | 35 "jaspar_jolma_motifs.bed.bgz", "jaspar_bgz", "Jaspar and Jolma Motifs (hg19) BGZ"] |
34 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz.tbi', | 36 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz.tbi', |
35 "jaspar_jolma_motifs.bed.bgz.tbi", "jaspar_tbi", "Jaspar and Jolma Motifs (hg19) TBI"] | 37 "jaspar_jolma_motifs.bed.bgz.tbi", "jaspar_tbi", "Jaspar and Jolma Motifs (hg19) TBI"] |
38 PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/jaspar.jolma.pwms.from.seq.RData', | |
39 "jaspar.jolma.pwms.from.seq.RData", "jaspar_pwm", "Jaspar and Jolma Motifs (hg19) PWM"] | |
36 elif motif_db == "mouse": | 40 elif motif_db == "mouse": |
37 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz', | 41 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz', |
38 "mm9_motifs_split.bed.bgz", "mouse_bgz", "Mouse Motifs (mm9) BGZ"] | 42 "mm9_motifs_split.bed.bgz", "mouse_bgz", "Mouse Motifs (mm9) BGZ"] |
39 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz.tbi', | 43 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz.tbi', |
40 "mm9_motifs_split.bed.bgz.tbi", "mouse_tbi", "Mouse Motifs (mm9) TBI"] | 44 "mm9_motifs_split.bed.bgz.tbi", "mouse_tbi", "Mouse Motifs (mm9) TBI"] |
45 PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/mm9.pwms.from.seq.RData', | |
46 "mm9.pwms.from.seq.RData", "mouse_pwm", "Mouse Motifs (mm9) PWM"] | |
41 else: | 47 else: |
42 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz', | 48 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz', |
43 "pouya_test_motifs.bed.bgz", "test_bgz", "Test Pouya Subset (hg19) BGZ"] | 49 "pouya_test_motifs.bed.bgz", "test_bgz", "Test Encode Motifs (hg19) BGZ"] |
44 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz.tbi', | 50 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz.tbi', |
45 "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Pouya Subset (hg19) TBI"] | 51 "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Encode Motifs (hg19) TBI"] |
52 PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/pouya.pwms.from.seq.RData', | |
53 "pouya.pwms.from.seq.RData", "test_pwm", "Test Encode Motifs (hg19) PWM"] | |
54 | |
46 | 55 |
47 # Save and add motif bgz file to motif_databases data table | 56 # Save and add motif bgz file to motif_databases data table |
48 bgz_reader = urllib2.urlopen( BGZ[0] ) | 57 bgz_reader = urllib2.urlopen( BGZ[0] ) |
49 bgz_data_table_entry = _stream_fasta_to_file( bgz_reader, target_directory, params, | 58 bgz_data_table_entry = _stream_fasta_to_file( bgz_reader, target_directory, params, |
50 BGZ[1], BGZ[2], BGZ[3] ) | 59 BGZ[1], BGZ[2], BGZ[3] ) |
52 | 61 |
53 # Save and add motif tbi file to motif_databases data table | 62 # Save and add motif tbi file to motif_databases data table |
54 tbi_reader = urllib2.urlopen( TBI[0] ) | 63 tbi_reader = urllib2.urlopen( TBI[0] ) |
55 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params, | 64 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params, |
56 TBI[1], TBI[2], TBI[3] ) | 65 TBI[1], TBI[2], TBI[3] ) |
66 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry ) | |
67 | |
68 # Save and add motif pwm file to motif_databases data table | |
69 tbi_reader = urllib2.urlopen( PWM[0] ) | |
70 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params, | |
71 PWM[1], PWM[2], PWM[3] ) | |
57 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry ) | 72 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry ) |
58 | 73 |
59 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): | 74 def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): |
60 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 75 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) |
61 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) | 76 data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) |