# HG changeset patch # User jeremyjliu # Date 1425009162 18000 # Node ID a5421f83f972b52a67ba9c87fb6e8e4481aed7ea # Parent e4229c66d78d011228f08d1ecf95508ea089f9a4 Uploaded diff -r e4229c66d78d -r a5421f83f972 data_manager/data_manager_fetch_motifs.py --- a/data_manager/data_manager_fetch_motifs.py Sun Feb 01 15:37:11 2015 -0500 +++ b/data_manager/data_manager_fetch_motifs.py Thu Feb 26 22:52:42 2015 -0500 @@ -23,26 +23,35 @@ def download_motif_databases( data_manager_dict, params, target_directory, motif_db ): # Select download URL, file name, data table name, and path using motif_db selector variable - if motif_db == "pouya": + if motif_db == "encode": BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz', - "pouya_motifs.bed.bgz", "pouya_bgz", "Pouya Encode Motifs (hg19) BGZ"] + "pouya_motifs.bed.bgz", "encode_bgz", "Encode Motifs (hg19) BGZ"] TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs.bed.bgz.tbi', - "pouya_motifs.bed.bgz.tbi", "pouya_tbi", "Pouya Encode Motifs (hg19) TBI"] + "pouya_motifs.bed.bgz.tbi", "encode_tbi", "Encode Motifs (hg19) TBI"] + PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/pouya.pwms.from.seq.RData', + "pouya.pwms.from.seq.RData", "encode_pwm", "Encode Motifs (hg19) PWM"] elif motif_db == "jaspar": BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz', "jaspar_jolma_motifs.bed.bgz", "jaspar_bgz", "Jaspar and Jolma Motifs (hg19) BGZ"] TBI = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz.tbi', "jaspar_jolma_motifs.bed.bgz.tbi", "jaspar_tbi", "Jaspar and Jolma Motifs (hg19) TBI"] + PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/jaspar.jolma.pwms.from.seq.RData', + "jaspar.jolma.pwms.from.seq.RData", "jaspar_pwm", "Jaspar and Jolma Motifs (hg19) PWM"] elif motif_db == "mouse": BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz', "mm9_motifs_split.bed.bgz", "mouse_bgz", "Mouse Motifs (mm9) BGZ"] TBI = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz.tbi', "mm9_motifs_split.bed.bgz.tbi", "mouse_tbi", "Mouse Motifs (mm9) TBI"] + PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/mm9.pwms.from.seq.RData', + "mm9.pwms.from.seq.RData", "mouse_pwm", "Mouse Motifs (mm9) PWM"] else: BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz', - "pouya_test_motifs.bed.bgz", "test_bgz", "Test Pouya Subset (hg19) BGZ"] + "pouya_test_motifs.bed.bgz", "test_bgz", "Test Encode Motifs (hg19) BGZ"] TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz.tbi', - "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Pouya Subset (hg19) TBI"] + "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Encode Motifs (hg19) TBI"] + PWM = ['http://compbio.med.harvard.edu/motif-enrichment/pwms/pouya.pwms.from.seq.RData', + "pouya.pwms.from.seq.RData", "test_pwm", "Test Encode Motifs (hg19) PWM"] + # Save and add motif bgz file to motif_databases data table bgz_reader = urllib2.urlopen( BGZ[0] ) @@ -56,6 +65,12 @@ TBI[1], TBI[2], TBI[3] ) _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry ) + # Save and add motif pwm file to motif_databases data table + tbi_reader = urllib2.urlopen( PWM[0] ) + tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params, + PWM[1], PWM[2], PWM[3] ) + _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry ) + def _add_data_table_entry( data_manager_dict, data_table, data_table_entry ): data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get( data_table, [] ) diff -r e4229c66d78d -r a5421f83f972 data_manager/data_manager_fetch_motifs.xml --- a/data_manager/data_manager_fetch_motifs.xml Sun Feb 01 15:37:11 2015 -0500 +++ b/data_manager/data_manager_fetch_motifs.xml Thu Feb 26 22:52:42 2015 -0500 @@ -3,8 +3,8 @@ data_manager_fetch_motifs.py "${out_file}" --motif_db ${motif_db_selector} - - + + diff -r e4229c66d78d -r a5421f83f972 tool-data/motif_databases.loc.sample --- a/tool-data/motif_databases.loc.sample Sun Feb 01 15:37:11 2015 -0500 +++ b/tool-data/motif_databases.loc.sample Thu Feb 26 22:52:42 2015 -0500 @@ -5,5 +5,5 @@ # # So, motif_databases.loc could look something like this: # -# test_bgz Test Pouya Subset (hg19) BGZ //tool-data/motifs/pouya_test_motifs.bed.bgz -# test_tbi Test Pouya Subset (hg19) TBI //tool-data/motifs/pouya_test_motifs.bed.bgz.tbi +# test_bgz Test Encode Motifs (hg19) BGZ //tool-data/motifs/pouya_test_motifs.bed.bgz +# test_tbi Test Encode motifs (hg19) TBI //tool-data/motifs/pouya_test_motifs.bed.bgz.tbi