Mercurial > repos > jeremyjliu > region_motif_enrichment
comparison region_motif_compare.r @ 37:76e108a19e64 draft
Uploaded
author | jeremyjliu |
---|---|
date | Tue, 07 Apr 2015 23:22:43 -0400 |
parents | b868314320e2 |
children |
comparison
equal
deleted
inserted
replaced
36:b868314320e2 | 37:76e108a19e64 |
---|---|
3 # motifs (or any location based feature) based on poisson tests and gc | 3 # motifs (or any location based feature) based on poisson tests and gc |
4 # corrections. All enrichment ratios relative to overall count / gc ratios. | 4 # corrections. All enrichment ratios relative to overall count / gc ratios. |
5 # Author: Jeremy liu | 5 # Author: Jeremy liu |
6 # Email: jeremy.liu@yale.edu | 6 # Email: jeremy.liu@yale.edu |
7 # Date: 15/02/11 | 7 # Date: 15/02/11 |
8 # Note: This script is meant to be invoked with the following command | 8 # Note: This script can be invoked with the following command |
9 # R --slave --vanilla -f ./region_motif_compare.r --args <workingdir> <pwm_file> | 9 # R --slave --vanilla -f ./region_motif_compare.r --args <workingdir> <pwm_file> |
10 # <intab1> <intab2> <enriched_tab> <depleted_tab> <plots_png> | 10 # <intab1> <intab2> <enriched_tab> <depleted_tab> <plots_png> |
11 # <workingdir> is working directory of galaxy installation | 11 # <workingdir> is the directory where plotting.r is saved |
12 # Dependencies: region_motif_data_manager, plotting.r | 12 # Dependencies: region_motif_data_manager, plotting.r, |
13 | 13 |
14 # Auxiliary function to concatenate multiple strings | 14 # Auxiliary function to concatenate multiple strings |
15 concat <- function(...) { | 15 concat <- function(...) { |
16 input_list <- list(...) | 16 input_list <- list(...) |
17 return(paste(input_list, sep="", collapse="")) | 17 return(paste(input_list, sep="", collapse="")) |
44 } | 44 } |
45 | 45 |
46 startTime = Sys.time() | 46 startTime = Sys.time() |
47 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n") | 47 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n") |
48 | 48 |
49 # Loading motif position weight matrix (pwm) file and input tab file | 49 # Loading motif position weight matrix (pwm) file |
50 cat("Loading motif postion weight matrices...\n") | |
51 lines = scan(pwmFile, what="character", sep="\n", quiet=TRUE) | |
52 indices = which(grepl("MOTIF", lines)) | |
53 names(indices) = lapply(indices, function(i) { | |
54 nameline = lines[i] | |
55 name = substr(nameline, 7, nchar(nameline)) | |
56 }) | |
57 | |
58 pwms = sapply(indices, function(i) { | |
59 infoline = unlist(strsplit(lines[i+1], " ")) | |
60 alength = as.numeric(infoline[4]) | |
61 width = as.numeric(infoline[6]) | |
62 subset = lines[(i+2):(i+2+width-1)] | |
63 motiflines = strsplit(subset, " ") | |
64 motif = t(do.call(rbind, motiflines)) | |
65 motif = apply(motif, 2, as.numeric) | |
66 }, simplify=FALSE, USE.NAMES=TRUE) | |
67 | |
68 # Loading input tab files | |
50 cat("Loading and reading input region motif count files...\n") | 69 cat("Loading and reading input region motif count files...\n") |
51 load(pwmFile) # pwms data structure | |
52 #if (dbCode == "c") { # Remaining implementation of dbCode "c" combined | |
53 # temp = pwms | |
54 # load(pwmFile2) | |
55 # pwms = append(temp, pwms) | |
56 #} | |
57 region1DF = read_tsv(inTab1) | 70 region1DF = read_tsv(inTab1) |
58 region2DF = read_tsv(inTab2) | 71 region2DF = read_tsv(inTab2) |
59 region1Counts = region1DF$counts | 72 region1Counts = region1DF$counts |
60 region2Counts = region2DF$counts | 73 region2Counts = region2DF$counts |
61 names(region1Counts) = region1DF$motif | 74 names(region1Counts) = region1DF$motif |