Mercurial > repos > jeremyjliu > region_motif_enrichment
annotate upload/region_motif_intersect.r @ 16:9a84f76db861 draft
Fixed RMOTIF_PATH env variable for toolshed deployment
author | jeremyjliu |
---|---|
date | Wed, 12 Nov 2014 15:19:37 -0500 |
parents | |
children |
rev | line source |
---|---|
16
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
1 # Name: region_motif_intersect.r |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
2 # Description: Takes a bed file of target regions and counts intersections |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
3 # of each motif (built in rdata database) and target regions. |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
4 # Author: Jeremy liu |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
5 # Email: jeremy.liu@yale.edu |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
6 # Date: 14/07/02 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
7 # Note: This script is meant to be invoked with the following command |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
8 # R --slave --vanilla -f ./region_motif_intersect.r --args <workingdir> <db> <inbed> <outtab> |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
9 # <workingdir> is working directory of galaxy installation |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
10 # <db> types: "t" test, "p" pouya, "j" jaspar jolma, "m" mouse |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
11 # Dependencies: none |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
12 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
13 # Auxiliary function to concatenate multiple strings |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
14 concat <- function(...) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
15 input_list <- list(...) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
16 return(paste(input_list, sep="", collapse="")) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
17 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
18 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
19 # Set common and data directories |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
20 args <- commandArgs() |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
21 workingDir = args[7] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
22 dbDir = concat(workingDir, "/region_motif_db") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
23 dbCode = args[8] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
24 if (dbCode == "t") { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
25 motifDB = concat(dbDir, "/pouya_test_motifs.bed.bgz") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
26 } else if (dbCode == "p") { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
27 motifDB = concat(dbDir, "/pouya_motifs.bed.bgz") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
28 } else if (dbCode == "j") { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
29 motifDB = concat(dbDir, "/jaspar_jolma_motifs.bed.bgz") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
30 } else if (dbCode == "m") { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
31 motifDB = concat(dbDir, "/mm9_motifs.bed.bgz") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
32 } else { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
33 motifDB = concat(dbDir, "/pouya_motifs.bed.bgz") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
34 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
35 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
36 # Set input and reference files, comment to toggle commmand line arguments |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
37 inBed = args[9] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
38 outTab = args[10] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
39 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
40 # Auxiliary function to read in BED file |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
41 read_bed <- function(file) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
42 return(read.table(file, sep="\t", stringsAsFactors=FALSE)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
43 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
44 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
45 startTime = Sys.time() |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
46 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
47 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
48 # Load dependencies |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
49 cat("Loading dependencies...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
50 suppressPackageStartupMessages(library(Rsamtools, quietly=TRUE)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
51 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
52 # Initializing hash table (as env) with motif names and loading tabix file |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
53 cat("Loading motif database and initializing hash table...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
54 motifTable = new.env() |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
55 motifTbx <- TabixFile(motifDB) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
56 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
57 # Loading input bed file, convert integer columns to numeric, name columns |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
58 cat("Loading region file...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
59 regionsDF = read_bed(inBed) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
60 dfTemp = sapply(regionsDF, is.integer) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
61 regionsDF[dfTemp] = lapply(regionsDF[dfTemp], as.numeric) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
62 names(regionsDF)[names(regionsDF) == "V1"] = "chr" |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
63 names(regionsDF)[names(regionsDF) == "V2"] = "start" |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
64 names(regionsDF)[names(regionsDF) == "V3"] = "end" |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
65 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
66 # Filtering regions to exclude chromosomes not in motif database |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
67 cat("Determining intersection counts...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
68 motifTbxChrs = seqnamesTabix(motifTbx) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
69 regionsDFFilter = subset(regionsDF, chr %in% motifTbxChrs) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
70 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
71 # Loading regions into GRanges object and scanning motif tabix database |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
72 # Region end is incremented by 1 since scanTabix querying is inclusive for |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
73 # position start but exclusive for position end. |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
74 param = GRanges(regionsDFFilter$chr, IRanges(regionsDFFilter$start, |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
75 end=regionsDFFilter$end + 1)) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
76 regionsIntersects = scanTabix(motifTbx, param=param) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
77 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
78 # Parsing result list and updating motif count hash table |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
79 cat("Parsing result list...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
80 for(regionIntersects in regionsIntersects) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
81 for(regionIntersect in strsplit(regionIntersects, " ")) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
82 intersectMotif = strsplit(regionIntersect, "\t")[[1]][4] |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
83 if(is.null(motifTable[[intersectMotif]])) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
84 motifTable[[intersectMotif]] = 1 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
85 } else { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
86 motifTable[[intersectMotif]] = motifTable[[intersectMotif]] + 1 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
87 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
88 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
89 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
90 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
91 # Converting motif count hash table to an integer vector for output |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
92 counts = integer(length = length(ls(motifTable))) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
93 names(counts) = ls(motifTable) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
94 for(motifName in ls(motifTable)) { |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
95 counts[motifName] = as.integer(motifTable[[motifName]]) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
96 } |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
97 |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
98 # Outputting intersection counts to tab delineated file |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
99 cat("Outputting to file...\n") |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
100 write.table(counts, outTab, quote=FALSE, sep="\t", row.names=TRUE, col.names=FALSE) |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
101 cat("Done. Job started at:", format(startTime, "%a %b %d %X %Y."), |
9a84f76db861
Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff
changeset
|
102 "Job ended at:", format(Sys.time(), "%a %b %d %X %Y."), "\n") |