Mercurial > repos > jeremyjliu > region_motif_enrichment
annotate upload/region_motif_compare.r @ 16:9a84f76db861 draft
Fixed RMOTIF_PATH env variable for toolshed deployment
author | jeremyjliu |
---|---|
date | Wed, 12 Nov 2014 15:19:37 -0500 |
parents | |
children |
rev | line source |
---|---|
16
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
1 # Name: region_motif_compare.r |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
2 # Description: Reads in two count files and determines enriched and depleted |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
3 # motifs (or any location based feature) based on poisson tests and gc |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
4 # corrections. All enrichment ratios relative to overall count / gc ratios. |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
5 # Author: Jeremy liu |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
6 # Email: jeremy.liu@yale.edu |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
7 # Date: 14/07/03 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
8 # Note: This script is meant to be invoked with the following command |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
9 # R --slave --vanilla -f ./region_motif_compare.r --args <workingdir> <db> <intab1> <intab2> |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
10 # <enriched_tab> <depleted_tab> <plots_png> |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
11 # <workingdir> is working directory of galaxy installation |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
12 # <db> types: "t" test, "p" pouya, "j" jaspar jolma, "m" mouse, "c" combined |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
13 # Dependencies: plotting.r |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
14 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
15 # Auxiliary function to concatenate multiple strings |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
16 concat <- function(...) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
17 input_list <- list(...) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
18 return(paste(input_list, sep="", collapse="")) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
19 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
20 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
21 # Supress all warning messages to prevent Galaxy treating warnings as errors |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
22 options(warn=-1) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
23 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
24 # Set common and data directories |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
25 args <- commandArgs() |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
26 workingDir = args[7] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
27 dbDir = concat(workingDir, "/region_motif_db") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
28 dbCode = args[8] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
29 # dbCode "c" implemented when pwmFile is loaded |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
30 if (dbCode == "t" | dbCode == "p") { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
31 pwmFile = concat(dbDir, "/pwms/pouya.pwms.from.seq.RData") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
32 } else if (dbCode == "j") { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
33 pwmFile = concat(dbDir, "/pwms/jaspar.jolma.pwms.from.seq.RData") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
34 } else if (dbCode == "m") { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
35 pwmFile = concat(dbDir, "/pwms/mm9.pwms.from.seq.RData") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
36 } else if (dbCode == "c") { # rest of dbCode "c" implemeted when pwmFile loaded |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
37 pwmFile = concat(dbDir, "/pwms/pouya.pwms.from.seq.RData") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
38 pwmFile2 = concat(dbDir, "/pwms/jaspar.jolma.pwms.from.seq.RData") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
39 } else { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
40 pwmFile = concat(dbDir, "/pwms/pouya.pwms.from.seq.RData") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
41 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
42 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
43 # Set input and reference files |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
44 inTab1 = args[9] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
45 inTab2 = args[10] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
46 enrichTab = args[11] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
47 depleteTab = args[12] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
48 plotsPng = args[13] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
49 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
50 # Load dependencies |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
51 source(concat(workingDir, "/region_motif_lib/plotting.r")) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
52 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
53 # Auxiliary function to read in tab file and prepare the data |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
54 read_tsv <- function(file) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
55 data = read.table(file, sep="\t", stringsAsFactors=FALSE) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
56 names(data)[names(data) == "V1"] = "motif" |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
57 names(data)[names(data) == "V2"] = "counts" |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
58 return(data) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
59 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
60 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
61 startTime = Sys.time() |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
62 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
63 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
64 # Loading motif position weight matrix (pwm) file and input tab file |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
65 cat("Loading and reading input region motif count files...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
66 load(pwmFile) # pwms data structure |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
67 if (dbCode == "c") { # Remaining implementation of dbCode "c" combined |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
68 temp = pwms |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
69 load(pwmFile2) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
70 pwms = append(temp, pwms) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
71 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
72 region1DF = read_tsv(inTab1) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
73 region2DF = read_tsv(inTab2) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
74 region1Counts = region1DF$counts |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
75 region2Counts = region2DF$counts |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
76 names(region1Counts) = region1DF$motif |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
77 names(region2Counts) = region2DF$motif |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
78 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
79 # Processing count vectors to account for missing 0 count motifs, then sorting |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
80 cat("Performing 0 count correction and sorting...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
81 allNames = union(names(region1Counts), names(region2Counts)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
82 region1Diff = setdiff(allNames, names(region1Counts)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
83 region2Diff = setdiff(allNames, names(region2Counts)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
84 addCounts1 = rep(0, length(region1Diff)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
85 addCounts2 = rep(0, length(region2Diff)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
86 names(addCounts1) = region1Diff |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
87 names(addCounts2) = region2Diff |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
88 newCounts1 = append(region1Counts, addCounts1) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
89 newCounts2 = append(region2Counts, addCounts2) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
90 region1Counts = newCounts1[sort.int(names(newCounts1), index.return=TRUE)$ix] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
91 region2Counts = newCounts2[sort.int(names(newCounts2), index.return=TRUE)$ix] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
92 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
93 # Generate gc content matrix |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
94 gc = sapply(pwms, function(i) mean(i[2:3,3:18])) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
95 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
96 # Apply poisson test, calculate p and q values, and filter significant results |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
97 cat("Applying poisson test...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
98 rValue = sum(region2Counts) / sum(region1Counts) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
99 pValue = sapply(seq(along=region1Counts), function(i) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
100 poisson.test(c(region1Counts[i], region2Counts[i]), r=1/rValue)$p.value |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
101 }) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
102 qValue = p.adjust(pValue, "fdr") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
103 indices = which(qValue<0.1 & abs(log2(region1Counts/region2Counts/rValue))>log2(1.5)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
104 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
105 # Setting up output diagnostic plots, 4 in 1 png image |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
106 png(plotsPng, width=800, height=800) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
107 xlab = "region1_count" |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
108 ylab = "region2_count" |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
109 lim = c(0.5, 5000) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
110 layout(matrix(1:4, ncol=2)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
111 par(mar=c(5, 5, 5, 1)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
112 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
113 # Plot all motif counts along the linear correlation coefficient |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
114 plot.scatter(region1Counts+0.5, region2Counts+0.5, log="xy", xlab=xlab, ylab=ylab, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
115 cex.lab=2.2, cex.axis=1.8, xlim=lim, ylim=lim*rValue) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
116 abline(0, rValue, untf=T) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
117 abline(0, rValue*2, untf=T, lty=2) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
118 abline(0, rValue/2, untf=T, lty=2) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
119 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
120 # Plot enriched and depleted motifs in red, housed in second plot |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
121 plot.scatter(region1Counts+0.5, region2Counts+0.5, log="xy", xlab=xlab, ylab=ylab, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
122 cex.lab=2.2, cex.axis=1.8, xlim=lim, ylim=lim*rValue) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
123 points(region1Counts[indices]+0.5, region2Counts[indices]+0.5, col="red") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
124 abline(0, rValue, untf=T) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
125 abline(0, rValue*2, untf=T, lty=2) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
126 abline(0, rValue/2, untf=T, lty=2) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
127 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
128 # Apply and plot gc correction and loess curve |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
129 cat("Applying gc correction, rerunning poisson test...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
130 ind = which(region1Counts>5) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
131 gc = gc[names(region2Counts)] # Reorder the indices of pwms to match input data |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
132 lo = plot.scatter(gc,log2(region2Counts/region1Counts),draw.loess=T, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
133 xlab="gc content of motif",ylab=paste("log2(",ylab,"/",xlab,")"), |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
134 cex.lab=2.2,cex.axis=1.8,ind=ind) # This function is in plotting.r |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
135 gcCorrection = 2^approx(lo$loess,xout=gc,rule=2)$y |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
136 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
137 # Recalculate p and q values, and filter for significant entries |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
138 pValueGC = sapply(seq(along=region1Counts),function(i) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
139 poisson.test(c(region1Counts[i],region2Counts[i]),r=1/gcCorrection[i])$p.value |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
140 }) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
141 qValueGC=p.adjust(pValueGC,"fdr") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
142 indicesGC = which(qValueGC<0.1 & abs(log2(region1Counts/region2Counts*gcCorrection))>log2(1.5)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
143 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
144 # Plot gc corrected motif counts |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
145 plot.scatter(region1Counts+0.5, (region2Counts+0.5)/gcCorrection, log="xy", |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
146 xlab=xlab, ylab=paste(ylab,"(normalized)"), cex.lab=2.2, cex.axis=1.8, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
147 xlim=lim, ylim=lim) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
148 points(region1Counts[indicesGC]+0.5, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
149 (region2Counts[indicesGC]+0.5)/gcCorrection[indicesGC], col="red") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
150 abline(0,1) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
151 abline(0,1*2,untf=T,lty=2) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
152 abline(0,1/2,untf=T,lty=2) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
153 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
154 # Trim results, compile statistics and output to file |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
155 # Only does so if significant results are computed |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
156 if(length(indicesGC) > 0) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
157 # Calculate expected counts and enrichment ratios |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
158 cat("Calculating statistics...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
159 nullExpect = region1Counts * gcCorrection |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
160 enrichment = region2Counts / nullExpect |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
161 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
162 # Reorder selected indices in ascending pvalue |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
163 cat("Reordering by ascending pvalue...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
164 indicesReorder = indicesGC[order(pValueGC[indicesGC])] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
165 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
166 # Combine data into one data frame and output to two files |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
167 cat("Splitting and outputting data...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
168 outDF = data.frame(motif=names(pValueGC), p=as.numeric(pValueGC), q=qValueGC, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
169 stringsAsFactors=F, region_1_count=region1Counts, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
170 null_expectation=round(nullExpect,2), region_2_count=region2Counts, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
171 enrichment=enrichment)[indicesReorder,] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
172 names(outDF)[which(names(outDF)=="region_1_count")]=xlab |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
173 names(outDF)[which(names(outDF)=="region_2_count")]=ylab |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
174 indicesEnrich = which(outDF$enrichment>1) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
175 indicesDeplete = which(outDF$enrichment<1) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
176 outDF$enrichment = ifelse(outDF$enrichment>1, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
177 round(outDF$enrichment,3), |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
178 paste("1/",round(1/outDF$enrichment,3))) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
179 write.table(outDF[indicesEnrich,], file=enrichTab, quote=FALSE, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
180 sep="\t", append=FALSE, row.names=FALSE, col.names=TRUE) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
181 write.table(outDF[indicesDeplete,], file=depleteTab, quote=FALSE, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
182 sep="\t", append=FALSE, row.names=FALSE, col.names=TRUE) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
183 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
184 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
185 # Catch display messages and output timing information |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
186 catchMessage = dev.off() |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
187 cat("Done. Job started at:", format(startTime, "%a %b %d %X %Y."), |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
188 "Job ended at:", format(Sys.time(), "%a %b %d %X %Y."), "\n") |