annotate upload/region_motif_intersect.r @ 16:9a84f76db861 draft

Fixed RMOTIF_PATH env variable for toolshed deployment
author jeremyjliu
date Wed, 12 Nov 2014 15:19:37 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
1 # Name: region_motif_intersect.r
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
2 # Description: Takes a bed file of target regions and counts intersections
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
3 # of each motif (built in rdata database) and target regions.
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
4 # Author: Jeremy liu
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
5 # Email: jeremy.liu@yale.edu
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
6 # Date: 14/07/02
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
7 # Note: This script is meant to be invoked with the following command
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
8 # R --slave --vanilla -f ./region_motif_intersect.r --args <workingdir> <db> <inbed> <outtab>
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
9 # <workingdir> is working directory of galaxy installation
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
10 # <db> types: "t" test, "p" pouya, "j" jaspar jolma, "m" mouse
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
11 # Dependencies: none
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
12
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
13 # Auxiliary function to concatenate multiple strings
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
14 concat <- function(...) {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
15 input_list <- list(...)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
16 return(paste(input_list, sep="", collapse=""))
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
17 }
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
18
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
19 # Set common and data directories
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
20 args <- commandArgs()
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
21 workingDir = args[7]
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
22 dbDir = concat(workingDir, "/region_motif_db")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
23 dbCode = args[8]
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
24 if (dbCode == "t") {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
25 motifDB = concat(dbDir, "/pouya_test_motifs.bed.bgz")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
26 } else if (dbCode == "p") {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
27 motifDB = concat(dbDir, "/pouya_motifs.bed.bgz")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
28 } else if (dbCode == "j") {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
29 motifDB = concat(dbDir, "/jaspar_jolma_motifs.bed.bgz")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
30 } else if (dbCode == "m") {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
31 motifDB = concat(dbDir, "/mm9_motifs.bed.bgz")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
32 } else {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
33 motifDB = concat(dbDir, "/pouya_motifs.bed.bgz")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
34 }
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
35
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
36 # Set input and reference files, comment to toggle commmand line arguments
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
37 inBed = args[9]
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
38 outTab = args[10]
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
39
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
40 # Auxiliary function to read in BED file
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
41 read_bed <- function(file) {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
42 return(read.table(file, sep="\t", stringsAsFactors=FALSE))
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
43 }
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
44
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
45 startTime = Sys.time()
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
46 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
47
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
48 # Load dependencies
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
49 cat("Loading dependencies...\n")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
50 suppressPackageStartupMessages(library(Rsamtools, quietly=TRUE))
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
51
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
52 # Initializing hash table (as env) with motif names and loading tabix file
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
53 cat("Loading motif database and initializing hash table...\n")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
54 motifTable = new.env()
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
55 motifTbx <- TabixFile(motifDB)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
56
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
57 # Loading input bed file, convert integer columns to numeric, name columns
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
58 cat("Loading region file...\n")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
59 regionsDF = read_bed(inBed)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
60 dfTemp = sapply(regionsDF, is.integer)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
61 regionsDF[dfTemp] = lapply(regionsDF[dfTemp], as.numeric)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
62 names(regionsDF)[names(regionsDF) == "V1"] = "chr"
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
63 names(regionsDF)[names(regionsDF) == "V2"] = "start"
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
64 names(regionsDF)[names(regionsDF) == "V3"] = "end"
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
65
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
66 # Filtering regions to exclude chromosomes not in motif database
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
67 cat("Determining intersection counts...\n")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
68 motifTbxChrs = seqnamesTabix(motifTbx)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
69 regionsDFFilter = subset(regionsDF, chr %in% motifTbxChrs)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
70
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
71 # Loading regions into GRanges object and scanning motif tabix database
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
72 # Region end is incremented by 1 since scanTabix querying is inclusive for
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
73 # position start but exclusive for position end.
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
74 param = GRanges(regionsDFFilter$chr, IRanges(regionsDFFilter$start,
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
75 end=regionsDFFilter$end + 1))
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
76 regionsIntersects = scanTabix(motifTbx, param=param)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
77
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
78 # Parsing result list and updating motif count hash table
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
79 cat("Parsing result list...\n")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
80 for(regionIntersects in regionsIntersects) {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
81 for(regionIntersect in strsplit(regionIntersects, " ")) {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
82 intersectMotif = strsplit(regionIntersect, "\t")[[1]][4]
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
83 if(is.null(motifTable[[intersectMotif]])) {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
84 motifTable[[intersectMotif]] = 1
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
85 } else {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
86 motifTable[[intersectMotif]] = motifTable[[intersectMotif]] + 1
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
87 }
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
88 }
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
89 }
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
90
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
91 # Converting motif count hash table to an integer vector for output
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
92 counts = integer(length = length(ls(motifTable)))
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
93 names(counts) = ls(motifTable)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
94 for(motifName in ls(motifTable)) {
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
95 counts[motifName] = as.integer(motifTable[[motifName]])
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
96 }
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
97
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
98 # Outputting intersection counts to tab delineated file
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
99 cat("Outputting to file...\n")
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
100 write.table(counts, outTab, quote=FALSE, sep="\t", row.names=TRUE, col.names=FALSE)
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
101 cat("Done. Job started at:", format(startTime, "%a %b %d %X %Y."),
9a84f76db861 Fixed RMOTIF_PATH env variable for toolshed deployment
jeremyjliu
parents:
diff changeset
102 "Job ended at:", format(Sys.time(), "%a %b %d %X %Y."), "\n")