diff data_manager/data_manager_fetch_motifs.py @ 8:b4c2836d2e4e draft

Uploaded
author jeremyjliu
date Fri, 30 Jan 2015 13:47:29 -0500
parents aa0d1b185070
children e4229c66d78d
line wrap: on
line diff
--- a/data_manager/data_manager_fetch_motifs.py	Thu Jan 29 00:00:12 2015 -0500
+++ b/data_manager/data_manager_fetch_motifs.py	Fri Jan 30 13:47:29 2015 -0500
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
-#Dan Blankenberg
+# Jeremy Liu
+# February 2015
+# Adapted from Dan Blackenburg's sample data manager
 
 import sys
 import os
@@ -20,32 +22,35 @@
 
 def download_motif_databases( data_manager_dict, params, target_directory, motif_db ):
 
+    # Select download URL, file name, data table name, and path using motif_db selector variable
     if motif_db == "pouya":
-        BGZ = ['COMPBIO URL HERE',
+        BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs_bed.bgz',
                 "pouya_motifs.bed.bgz", "pouya_bgz", "Pouya Encode Motifs (hg19) BGZ"]
-        TBI = ['COMPBIO URL HERE',
+        TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_motifs_bed.bgz.tbi',
                 "pouya_motifs.bed.bgz.tbi", "pouya_tbi", "Pouya Encode Motifs (hg19) TBI"]
     elif motif_db == "jaspar":
-        BGZ = ['COMPBIO URL HERE',
-               "jaspar_jolma_motifs.bed.bgz", "jaspar_bgz", "Jaspar and Jolma Motifs (hg19) BGZ"]
-        TBI = ['COMPBIO URL HERE',
+        BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz',
+                "jaspar_jolma_motifs.bed.bgz", "jaspar_bgz", "Jaspar and Jolma Motifs (hg19) BGZ"]
+        TBI = ['http://compbio.med.harvard.edu/motif-enrichment/jaspar_jolma_motifs.bed.bgz.tbi',
                 "jaspar_jolma_motifs.bed.bgz.tbi", "jaspar_tbi", "Jaspar and Jolma Motifs (hg19) TBI"]
     elif motif_db == "mouse":
-        BGZ = ['COMPBIO URL HERE',
-                "mouse_motifs.bed.bgz", "mouse_bgz", "Mouse Motifs (mm9) BGZ"]
-        TBI = ['COMPBIO URL HERE',
-                "mouse_motifs.bed.bgz.tbi", "mouse_tbi", "Mouse Motifs (mm9) TBI"]
+        BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz',
+                "mm9_motifs_split.bed.bgz", "mouse_bgz", "Mouse Motifs (mm9) BGZ"]
+        TBI = ['http://compbio.med.harvard.edu/motif-enrichment/mm9_motifs_split.bed.bgz.tbi',
+                "mm9_motifs_split.bed.bgz.tbi", "mouse_tbi", "Mouse Motifs (mm9) TBI"]
     else:
-        BGZ = ['http://gehlenborg.com/wp-content/uploads/motif/pouya_test_motifs.bed.bgz', 
+        BGZ = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz', 
                "pouya_test_motifs.bed.bgz", "test_bgz", "Test Pouya Subset (hg19) BGZ"]
-        TBI = ['http://gehlenborg.com/wp-content/uploads/motif/pouya_test_motifs.bed.bgz.tbi',
+        TBI = ['http://compbio.med.harvard.edu/motif-enrichment/pouya_test_motifs.bed.bgz.tbi',
                "pouya_test_motifs.bed.bgz.tbi", "test_tbi", "Test Pouya Subset (hg19) TBI"]
 
+    # Save and add motif bgz file to motif_databases data table
     bgz_reader = urllib2.urlopen( BGZ[0] )
     bgz_data_table_entry = _stream_fasta_to_file( bgz_reader, target_directory, params,
                             BGZ[1], BGZ[2], BGZ[3] )
     _add_data_table_entry( data_manager_dict, 'motif_databases', bgz_data_table_entry )
 
+    # Save and add motif tbi file to motif_databases data table
     tbi_reader = urllib2.urlopen( TBI[0] )
     tbi_data_table_entry = _stream_fasta_to_file( tbi_reader, target_directory, params,
                             TBI[1], TBI[2], TBI[3] )