changeset 37:76e108a19e64 draft

Uploaded
author jeremyjliu
date Tue, 07 Apr 2015 23:22:43 -0400
parents b868314320e2
children aa921c733d10
files region_motif_compare.r region_motif_intersect.r repository_dependencies.xml
diffstat 3 files changed, 27 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/region_motif_compare.r	Fri Mar 06 13:24:03 2015 -0500
+++ b/region_motif_compare.r	Tue Apr 07 23:22:43 2015 -0400
@@ -5,11 +5,11 @@
 # Author: Jeremy liu
 # Email: jeremy.liu@yale.edu
 # Date: 15/02/11
-# Note: This script is meant to be invoked with the following command
+# Note: This script can be invoked with the following command
 # R --slave --vanilla -f ./region_motif_compare.r --args <workingdir> <pwm_file> 
 #        <intab1> <intab2> <enriched_tab> <depleted_tab> <plots_png>
-# <workingdir> is working directory of galaxy installation
-# Dependencies: region_motif_data_manager, plotting.r
+# <workingdir> is the directory where plotting.r is saved
+# Dependencies: region_motif_data_manager, plotting.r, 
 
 # Auxiliary function to concatenate multiple strings
 concat <- function(...) {
@@ -46,14 +46,27 @@
 startTime = Sys.time()
 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n")
 
-# Loading motif position weight matrix (pwm) file and input tab file
+# Loading motif position weight matrix (pwm) file 
+cat("Loading motif postion weight matrices...\n")
+lines = scan(pwmFile, what="character", sep="\n", quiet=TRUE)
+indices = which(grepl("MOTIF", lines))
+names(indices) = lapply(indices, function(i) {
+    nameline = lines[i]
+    name = substr(nameline, 7, nchar(nameline))
+    })
+
+pwms = sapply(indices, function(i) {
+    infoline = unlist(strsplit(lines[i+1], " "))
+    alength = as.numeric(infoline[4])
+    width = as.numeric(infoline[6])
+    subset = lines[(i+2):(i+2+width-1)]
+    motiflines = strsplit(subset, " ")
+    motif = t(do.call(rbind, motiflines))
+    motif = apply(motif, 2, as.numeric)
+    }, simplify=FALSE, USE.NAMES=TRUE)
+
+# Loading input tab files
 cat("Loading and reading input region motif count files...\n")
-load(pwmFile) # pwms data structure
-#if (dbCode == "c") { # Remaining implementation of dbCode "c" combined 
-#	temp = pwms
-#	load(pwmFile2)
-#	pwms = append(temp, pwms)
-#}
 region1DF = read_tsv(inTab1)
 region2DF = read_tsv(inTab2)
 region1Counts = region1DF$counts
--- a/region_motif_intersect.r	Fri Mar 06 13:24:03 2015 -0500
+++ b/region_motif_intersect.r	Tue Apr 07 23:22:43 2015 -0400
@@ -4,9 +4,9 @@
 # Author: Jeremy Liu
 # Email: jeremy.liu@yale.edu
 # Date: 15/02/11
-# Note: This script is meant to be invoked with the following command
+# Note: This script can be invoked with the following command
 # R --slave --vanilla -f ./region_motif_intersect.r --args <db_bgz> <db_tbi> <inbed> <outtab>
-# Dependencies: region_motif_data_manager
+# Dependencies: region_motif_data_manager, Rsamtools
 
 # Auxiliary function to concatenate multiple strings
 concat <- function(...) {
@@ -33,7 +33,7 @@
 
 # Load dependencies
 cat("Loading dependencies...\n")
-suppressPackageStartupMessages(library(Rsamtools, quietly=TRUE))
+suppressPackageStartupMessages(library(Rsamtools, quietly=TRUE)) # NEED TO HANDLE INSTALLATION
 
 # Initializing hash table (as env) with motif names and loading tabix file
 cat("Loading motif database and initializing hash table...\n")
--- a/repository_dependencies.xml	Fri Mar 06 13:24:03 2015 -0500
+++ b/repository_dependencies.xml	Tue Apr 07 23:22:43 2015 -0400
@@ -1,4 +1,4 @@
 <?xml version="1.0"?>
 <repositories description="Region_Motif_Enrichment requires Region_Motif_Data_Manager motif databases">
-     <repository changeset_revision="a5421f83f972" name="region_motif_data_manager" owner="jeremyjliu" toolshed="http://testtoolshed.g2.bx.psu.edu" />
+     <repository changeset_revision="24a2c2783fb8" name="region_motif_data_manager" owner="jeremyjliu" toolshed="http://testtoolshed.g2.bx.psu.edu" />
 </repositories>