comparison region_motif_intersect.r @ 13:00627aa25b6c draft

Update relative directories within intersect.r
author jeremyjliu
date Fri, 07 Nov 2014 12:38:18 -0500
parents 19d2cffb8db3
children
comparison
equal deleted inserted replaced
12:ef85d5e787d7 13:00627aa25b6c
17 } 17 }
18 18
19 # Set common and data directories 19 # Set common and data directories
20 args <- commandArgs() 20 args <- commandArgs()
21 workingDir = args[7] 21 workingDir = args[7]
22 commonDir = concat(workingDir, "/tools/my_tools") 22 dbDir = concat(workingDir, "/region_motif_db")
23 dbCode = args[8] 23 dbCode = args[8]
24 if (dbCode == "t") { 24 if (dbCode == "t") {
25 motifDB = concat(commonDir, "/region_motif_db/pouya_test_motifs.bed.bgz") 25 motifDB = concat(dbDir, "/pouya_test_motifs.bed.bgz")
26 } else if (dbCode == "p") { 26 } else if (dbCode == "p") {
27 motifDB = concat(commonDir, "/region_motif_db/pouya_motifs.bed.bgz") 27 motifDB = concat(dbDir, "/pouya_motifs.bed.bgz")
28 } else if (dbCode == "j") { 28 } else if (dbCode == "j") {
29 motifDB = concat(commonDir, "/region_motif_db/jaspar_jolma_motifs.bed.bgz") 29 motifDB = concat(dbDir, "/jaspar_jolma_motifs.bed.bgz")
30 } else if (dbCode == "m") { 30 } else if (dbCode == "m") {
31 motifDB = concat(commonDir, "/region_motif_db/mm9_motifs_split.bed.bgz") 31 motifDB = concat(dbDir, "/mm9_motifs.bed.bgz")
32 } else { 32 } else {
33 motifDB = concat(commonDir, "/region_motif_db/pouya_motifs.bed.bgz") 33 motifDB = concat(dbDir, "/pouya_motifs.bed.bgz")
34 } 34 }
35 35
36 # Set input and reference files, comment to toggle commmand line arguments 36 # Set input and reference files, comment to toggle commmand line arguments
37 inBed = args[9] 37 inBed = args[9]
38 outTab = args[10] 38 outTab = args[10]
44 44
45 startTime = Sys.time() 45 startTime = Sys.time()
46 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n") 46 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n")
47 47
48 # Load dependencies 48 # Load dependencies
49 #cat("Loading dependencies...\n") 49 cat("Loading dependencies...\n")
50 suppressPackageStartupMessages(library(Rsamtools, quietly=TRUE)) 50 suppressPackageStartupMessages(library(Rsamtools, quietly=TRUE))
51 51
52 # Initializing hash table (as env) with motif names and loading tabix file 52 # Initializing hash table (as env) with motif names and loading tabix file
53 #cat("Loading motif database and initializing hash table...\n") 53 cat("Loading motif database and initializing hash table...\n")
54 motifTable = new.env() 54 motifTable = new.env()
55 motifTbx <- TabixFile(motifDB) 55 motifTbx <- TabixFile(motifDB)
56 56
57 # Loading input bed file, convert integer columns to numeric, name columns 57 # Loading input bed file, convert integer columns to numeric, name columns
58 #cat("Loading region file...\n") 58 cat("Loading region file...\n")
59 regionsDF = read_bed(inBed) 59 regionsDF = read_bed(inBed)
60 dfTemp = sapply(regionsDF, is.integer) 60 dfTemp = sapply(regionsDF, is.integer)
61 regionsDF[dfTemp] = lapply(regionsDF[dfTemp], as.numeric) 61 regionsDF[dfTemp] = lapply(regionsDF[dfTemp], as.numeric)
62 names(regionsDF)[names(regionsDF) == "V1"] = "chr" 62 names(regionsDF)[names(regionsDF) == "V1"] = "chr"
63 names(regionsDF)[names(regionsDF) == "V2"] = "start" 63 names(regionsDF)[names(regionsDF) == "V2"] = "start"
64 names(regionsDF)[names(regionsDF) == "V3"] = "end" 64 names(regionsDF)[names(regionsDF) == "V3"] = "end"
65 65
66 # Filtering regions to exclude chromosomes not in motif database 66 # Filtering regions to exclude chromosomes not in motif database
67 #cat("Determining intersection counts...\n") 67 cat("Determining intersection counts...\n")
68 motifTbxChrs = seqnamesTabix(motifTbx) 68 motifTbxChrs = seqnamesTabix(motifTbx)
69 regionsDFFilter = subset(regionsDF, chr %in% motifTbxChrs) 69 regionsDFFilter = subset(regionsDF, chr %in% motifTbxChrs)
70 70
71 # Loading regions into GRanges object and scanning motif tabix database 71 # Loading regions into GRanges object and scanning motif tabix database
72 # Region end is incremented by 1 since scanTabix querying is inclusive for 72 # Region end is incremented by 1 since scanTabix querying is inclusive for
74 param = GRanges(regionsDFFilter$chr, IRanges(regionsDFFilter$start, 74 param = GRanges(regionsDFFilter$chr, IRanges(regionsDFFilter$start,
75 end=regionsDFFilter$end + 1)) 75 end=regionsDFFilter$end + 1))
76 regionsIntersects = scanTabix(motifTbx, param=param) 76 regionsIntersects = scanTabix(motifTbx, param=param)
77 77
78 # Parsing result list and updating motif count hash table 78 # Parsing result list and updating motif count hash table
79 #cat("Parsing result list...\n") 79 cat("Parsing result list...\n")
80 for(regionIntersects in regionsIntersects) { 80 for(regionIntersects in regionsIntersects) {
81 for(regionIntersect in strsplit(regionIntersects, " ")) { 81 for(regionIntersect in strsplit(regionIntersects, " ")) {
82 intersectMotif = strsplit(regionIntersect, "\t")[[1]][4] 82 intersectMotif = strsplit(regionIntersect, "\t")[[1]][4]
83 if(is.null(motifTable[[intersectMotif]])) { 83 if(is.null(motifTable[[intersectMotif]])) {
84 motifTable[[intersectMotif]] = 1 84 motifTable[[intersectMotif]] = 1
94 for(motifName in ls(motifTable)) { 94 for(motifName in ls(motifTable)) {
95 counts[motifName] = as.integer(motifTable[[motifName]]) 95 counts[motifName] = as.integer(motifTable[[motifName]])
96 } 96 }
97 97
98 # Outputting intersection counts to tab delineated file 98 # Outputting intersection counts to tab delineated file
99 #cat("Outputting to file...\n") 99 cat("Outputting to file...\n")
100 write.table(counts, outTab, quote=FALSE, sep="\t", row.names=TRUE, col.names=FALSE) 100 write.table(counts, outTab, quote=FALSE, sep="\t", row.names=TRUE, col.names=FALSE)
101 cat("Done. Job started at:", format(startTime, "%a %b %d %X %Y."), 101 cat("Done. Job started at:", format(startTime, "%a %b %d %X %Y."),
102 "Job ended at:", format(Sys.time(), "%a %b %d %X %Y."), "\n") 102 "Job ended at:", format(Sys.time(), "%a %b %d %X %Y."), "\n")