comparison data_manager/data_manager_fetch_motifs.py @ 8:b4c2836d2e4e draft

Uploaded
author jeremyjliu
date Fri, 30 Jan 2015 13:47:29 -0500
parents aa0d1b185070
children e4229c66d78d
comparison
equal deleted inserted replaced
7:2c918b3a7e8c 8:b4c2836d2e4e
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 #Dan Blankenberg 2 # Jeremy Liu
3 # February 2015
4 # Adapted from Dan Blackenburg's sample data manager
3 5
4 import sys 6 import sys
5 import os 7 import os
6 import tempfile 8 import tempfile
7 import shutil 9 import shutil
18 20
19 CHUNK_SIZE = 2**20 #1mb 21 CHUNK_SIZE = 2**20 #1mb
20 22
21 def download_motif_databases( data_manager_dict, params, target_directory, motif_db ): 23 def download_motif_databases( data_manager_dict, params, target_directory, motif_db ):
22 24
25 # Select download URL, file name, data table name, and path using motif_db selector variable
23 if motif_db == "pouya": 26 if motif_db == "pouya":
24 BGZ = ['COMPBIO URL HERE', 27 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs_bed.bgz',
25 "pouya_motifs.bed.bgz", "pouya_bgz", "Pouya Encode Motifs (hg19) BGZ"] 28 "pouya_motifs.bed.bgz", "pouya_bgz", "Pouya Encode Motifs (hg19) BGZ"]
26 TBI = ['COMPBIO URL HERE', 29 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs_bed.bgz.tbi',
27 "pouya_motifs.bed.bgz.tbi", "pouya_tbi", "Pouya Encode Motifs (hg19) TBI"] 30 "pouya_motifs.bed.bgz.tbi", "pouya_tbi", "Pouya Encode Motifs (hg19) TBI"]
28 elif motif_db == "jaspar": 31 elif motif_db == "jaspar":
29 BGZ = ['COMPBIO URL HERE', 32 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz',
30 "jaspar_jolma_motifs.bed.bgz", "jaspar_bgz", "Jaspar and Jolma Motifs (hg19) BGZ"] 33 "jaspar_jolma_motifs.bed.bgz", "jaspar_bgz", "Jaspar and Jolma Motifs (hg19) BGZ"]
31 TBI = ['COMPBIO URL HERE', 34 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz.tbi',
32 "jaspar_jolma_motifs.bed.bgz.tbi", "jaspar_tbi", "Jaspar and Jolma Motifs (hg19) TBI"] 35 "jaspar_jolma_motifs.bed.bgz.tbi", "jaspar_tbi", "Jaspar and Jolma Motifs (hg19) TBI"]
33 elif motif_db == "mouse": 36 elif motif_db == "mouse":
34 BGZ = ['COMPBIO URL HERE', 37 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz',
35 "mouse_motifs.bed.bgz", "mouse_bgz", "Mouse Motifs (mm9) BGZ"] 38 "mm9_motifs_split.bed.bgz", "mouse_bgz", "Mouse Motifs (mm9) BGZ"]
36 TBI = ['COMPBIO URL HERE', 39 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz.tbi',
37 "mouse_motifs.bed.bgz.tbi", "mouse_tbi", "Mouse Motifs (mm9) TBI"] 40 "mm9_motifs_split.bed.bgz.tbi", "mouse_tbi", "Mouse Motifs (mm9) TBI"]
38 else: 41 else:
39 BGZ = ['http://gehlenborg.com/wp-content/uploads/motif/pouya_test_motifs.bed.bgz', 42 BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz',
40 "pouya_test_motifs.bed.bgz", "test_bgz", "Test Pouya Subset (hg19) BGZ"] 43 "pouya_test_motifs.bed.bgz", "test_bgz", "Test Pouya Subset (hg19) BGZ"]
41 TBI = ['http://gehlenborg.com/wp-content/uploads/motif/pouya_test_motifs.bed.bgz.tbi', 44 TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz.tbi',
42 "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Pouya Subset (hg19) TBI"] 45 "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Pouya Subset (hg19) TBI"]
43 46
47 # Save and add motif bgz file to motif_databases data table
44 bgz_reader = urllib2.urlopen( BGZ[0] ) 48 bgz_reader = urllib2.urlopen( BGZ[0] )
45 bgz_data_table_entry = _stream_fasta_to_file( bgz_reader, target_directory, params, 49 bgz_data_table_entry = _stream_fasta_to_file( bgz_reader, target_directory, params,
46 BGZ[1], BGZ[2], BGZ[3] ) 50 BGZ[1], BGZ[2], BGZ[3] )
47 _add_data_table_entry( data_manager_dict, 'motif_databases', bgz_data_table_entry ) 51 _add_data_table_entry( data_manager_dict, 'motif_databases', bgz_data_table_entry )
48 52
53 # Save and add motif tbi file to motif_databases data table
49 tbi_reader = urllib2.urlopen( TBI[0] ) 54 tbi_reader = urllib2.urlopen( TBI[0] )
50 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params, 55 tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params,
51 TBI[1], TBI[2], TBI[3] ) 56 TBI[1], TBI[2], TBI[3] )
52 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry ) 57 _add_data_table_entry( data_manager_dict, 'motif_databases', tbi_data_table_entry )
53 58