comparison region_motif_compare.r @ 37:76e108a19e64 draft

Uploaded
author jeremyjliu
date Tue, 07 Apr 2015 23:22:43 -0400
parents b868314320e2
children
comparison
equal deleted inserted replaced
36:b868314320e2 37:76e108a19e64
3 # motifs (or any location based feature) based on poisson tests and gc 3 # motifs (or any location based feature) based on poisson tests and gc
4 # corrections. All enrichment ratios relative to overall count / gc ratios. 4 # corrections. All enrichment ratios relative to overall count / gc ratios.
5 # Author: Jeremy liu 5 # Author: Jeremy liu
6 # Email: jeremy.liu@yale.edu 6 # Email: jeremy.liu@yale.edu
7 # Date: 15/02/11 7 # Date: 15/02/11
8 # Note: This script is meant to be invoked with the following command 8 # Note: This script can be invoked with the following command
9 # R --slave --vanilla -f ./region_motif_compare.r --args <workingdir> <pwm_file> 9 # R --slave --vanilla -f ./region_motif_compare.r --args <workingdir> <pwm_file>
10 # <intab1> <intab2> <enriched_tab> <depleted_tab> <plots_png> 10 # <intab1> <intab2> <enriched_tab> <depleted_tab> <plots_png>
11 # <workingdir> is working directory of galaxy installation 11 # <workingdir> is the directory where plotting.r is saved
12 # Dependencies: region_motif_data_manager, plotting.r 12 # Dependencies: region_motif_data_manager, plotting.r,
13 13
14 # Auxiliary function to concatenate multiple strings 14 # Auxiliary function to concatenate multiple strings
15 concat <- function(...) { 15 concat <- function(...) {
16 input_list <- list(...) 16 input_list <- list(...)
17 return(paste(input_list, sep="", collapse="")) 17 return(paste(input_list, sep="", collapse=""))
44 } 44 }
45 45
46 startTime = Sys.time() 46 startTime = Sys.time()
47 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n") 47 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n")
48 48
49 # Loading motif position weight matrix (pwm) file and input tab file 49 # Loading motif position weight matrix (pwm) file
50 cat("Loading motif postion weight matrices...\n")
51 lines = scan(pwmFile, what="character", sep="\n", quiet=TRUE)
52 indices = which(grepl("MOTIF", lines))
53 names(indices) = lapply(indices, function(i) {
54 nameline = lines[i]
55 name = substr(nameline, 7, nchar(nameline))
56 })
57
58 pwms = sapply(indices, function(i) {
59 infoline = unlist(strsplit(lines[i+1], " "))
60 alength = as.numeric(infoline[4])
61 width = as.numeric(infoline[6])
62 subset = lines[(i+2):(i+2+width-1)]
63 motiflines = strsplit(subset, " ")
64 motif = t(do.call(rbind, motiflines))
65 motif = apply(motif, 2, as.numeric)
66 }, simplify=FALSE, USE.NAMES=TRUE)
67
68 # Loading input tab files
50 cat("Loading and reading input region motif count files...\n") 69 cat("Loading and reading input region motif count files...\n")
51 load(pwmFile) # pwms data structure
52 #if (dbCode == "c") { # Remaining implementation of dbCode "c" combined
53 # temp = pwms
54 # load(pwmFile2)
55 # pwms = append(temp, pwms)
56 #}
57 region1DF = read_tsv(inTab1) 70 region1DF = read_tsv(inTab1)
58 region2DF = read_tsv(inTab2) 71 region2DF = read_tsv(inTab2)
59 region1Counts = region1DF$counts 72 region1Counts = region1DF$counts
60 region2Counts = region2DF$counts 73 region2Counts = region2DF$counts
61 names(region1Counts) = region1DF$motif 74 names(region1Counts) = region1DF$motif