Mercurial > repos > jeremyjliu > region_motif_enrichment
comparison region_motif_intersect.r @ 13:00627aa25b6c draft
Update relative directories within intersect.r
author | jeremyjliu |
---|---|
date | Fri, 07 Nov 2014 12:38:18 -0500 |
parents | 19d2cffb8db3 |
children |
comparison
equal
deleted
inserted
replaced
12:ef85d5e787d7 | 13:00627aa25b6c |
---|---|
17 } | 17 } |
18 | 18 |
19 # Set common and data directories | 19 # Set common and data directories |
20 args <- commandArgs() | 20 args <- commandArgs() |
21 workingDir = args[7] | 21 workingDir = args[7] |
22 commonDir = concat(workingDir, "/tools/my_tools") | 22 dbDir = concat(workingDir, "/region_motif_db") |
23 dbCode = args[8] | 23 dbCode = args[8] |
24 if (dbCode == "t") { | 24 if (dbCode == "t") { |
25 motifDB = concat(commonDir, "/region_motif_db/pouya_test_motifs.bed.bgz") | 25 motifDB = concat(dbDir, "/pouya_test_motifs.bed.bgz") |
26 } else if (dbCode == "p") { | 26 } else if (dbCode == "p") { |
27 motifDB = concat(commonDir, "/region_motif_db/pouya_motifs.bed.bgz") | 27 motifDB = concat(dbDir, "/pouya_motifs.bed.bgz") |
28 } else if (dbCode == "j") { | 28 } else if (dbCode == "j") { |
29 motifDB = concat(commonDir, "/region_motif_db/jaspar_jolma_motifs.bed.bgz") | 29 motifDB = concat(dbDir, "/jaspar_jolma_motifs.bed.bgz") |
30 } else if (dbCode == "m") { | 30 } else if (dbCode == "m") { |
31 motifDB = concat(commonDir, "/region_motif_db/mm9_motifs_split.bed.bgz") | 31 motifDB = concat(dbDir, "/mm9_motifs.bed.bgz") |
32 } else { | 32 } else { |
33 motifDB = concat(commonDir, "/region_motif_db/pouya_motifs.bed.bgz") | 33 motifDB = concat(dbDir, "/pouya_motifs.bed.bgz") |
34 } | 34 } |
35 | 35 |
36 # Set input and reference files, comment to toggle commmand line arguments | 36 # Set input and reference files, comment to toggle commmand line arguments |
37 inBed = args[9] | 37 inBed = args[9] |
38 outTab = args[10] | 38 outTab = args[10] |
44 | 44 |
45 startTime = Sys.time() | 45 startTime = Sys.time() |
46 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n") | 46 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n") |
47 | 47 |
48 # Load dependencies | 48 # Load dependencies |
49 #cat("Loading dependencies...\n") | 49 cat("Loading dependencies...\n") |
50 suppressPackageStartupMessages(library(Rsamtools, quietly=TRUE)) | 50 suppressPackageStartupMessages(library(Rsamtools, quietly=TRUE)) |
51 | 51 |
52 # Initializing hash table (as env) with motif names and loading tabix file | 52 # Initializing hash table (as env) with motif names and loading tabix file |
53 #cat("Loading motif database and initializing hash table...\n") | 53 cat("Loading motif database and initializing hash table...\n") |
54 motifTable = new.env() | 54 motifTable = new.env() |
55 motifTbx <- TabixFile(motifDB) | 55 motifTbx <- TabixFile(motifDB) |
56 | 56 |
57 # Loading input bed file, convert integer columns to numeric, name columns | 57 # Loading input bed file, convert integer columns to numeric, name columns |
58 #cat("Loading region file...\n") | 58 cat("Loading region file...\n") |
59 regionsDF = read_bed(inBed) | 59 regionsDF = read_bed(inBed) |
60 dfTemp = sapply(regionsDF, is.integer) | 60 dfTemp = sapply(regionsDF, is.integer) |
61 regionsDF[dfTemp] = lapply(regionsDF[dfTemp], as.numeric) | 61 regionsDF[dfTemp] = lapply(regionsDF[dfTemp], as.numeric) |
62 names(regionsDF)[names(regionsDF) == "V1"] = "chr" | 62 names(regionsDF)[names(regionsDF) == "V1"] = "chr" |
63 names(regionsDF)[names(regionsDF) == "V2"] = "start" | 63 names(regionsDF)[names(regionsDF) == "V2"] = "start" |
64 names(regionsDF)[names(regionsDF) == "V3"] = "end" | 64 names(regionsDF)[names(regionsDF) == "V3"] = "end" |
65 | 65 |
66 # Filtering regions to exclude chromosomes not in motif database | 66 # Filtering regions to exclude chromosomes not in motif database |
67 #cat("Determining intersection counts...\n") | 67 cat("Determining intersection counts...\n") |
68 motifTbxChrs = seqnamesTabix(motifTbx) | 68 motifTbxChrs = seqnamesTabix(motifTbx) |
69 regionsDFFilter = subset(regionsDF, chr %in% motifTbxChrs) | 69 regionsDFFilter = subset(regionsDF, chr %in% motifTbxChrs) |
70 | 70 |
71 # Loading regions into GRanges object and scanning motif tabix database | 71 # Loading regions into GRanges object and scanning motif tabix database |
72 # Region end is incremented by 1 since scanTabix querying is inclusive for | 72 # Region end is incremented by 1 since scanTabix querying is inclusive for |
74 param = GRanges(regionsDFFilter$chr, IRanges(regionsDFFilter$start, | 74 param = GRanges(regionsDFFilter$chr, IRanges(regionsDFFilter$start, |
75 end=regionsDFFilter$end + 1)) | 75 end=regionsDFFilter$end + 1)) |
76 regionsIntersects = scanTabix(motifTbx, param=param) | 76 regionsIntersects = scanTabix(motifTbx, param=param) |
77 | 77 |
78 # Parsing result list and updating motif count hash table | 78 # Parsing result list and updating motif count hash table |
79 #cat("Parsing result list...\n") | 79 cat("Parsing result list...\n") |
80 for(regionIntersects in regionsIntersects) { | 80 for(regionIntersects in regionsIntersects) { |
81 for(regionIntersect in strsplit(regionIntersects, " ")) { | 81 for(regionIntersect in strsplit(regionIntersects, " ")) { |
82 intersectMotif = strsplit(regionIntersect, "\t")[[1]][4] | 82 intersectMotif = strsplit(regionIntersect, "\t")[[1]][4] |
83 if(is.null(motifTable[[intersectMotif]])) { | 83 if(is.null(motifTable[[intersectMotif]])) { |
84 motifTable[[intersectMotif]] = 1 | 84 motifTable[[intersectMotif]] = 1 |
94 for(motifName in ls(motifTable)) { | 94 for(motifName in ls(motifTable)) { |
95 counts[motifName] = as.integer(motifTable[[motifName]]) | 95 counts[motifName] = as.integer(motifTable[[motifName]]) |
96 } | 96 } |
97 | 97 |
98 # Outputting intersection counts to tab delineated file | 98 # Outputting intersection counts to tab delineated file |
99 #cat("Outputting to file...\n") | 99 cat("Outputting to file...\n") |
100 write.table(counts, outTab, quote=FALSE, sep="\t", row.names=TRUE, col.names=FALSE) | 100 write.table(counts, outTab, quote=FALSE, sep="\t", row.names=TRUE, col.names=FALSE) |
101 cat("Done. Job started at:", format(startTime, "%a %b %d %X %Y."), | 101 cat("Done. Job started at:", format(startTime, "%a %b %d %X %Y."), |
102 "Job ended at:", format(Sys.time(), "%a %b %d %X %Y."), "\n") | 102 "Job ended at:", format(Sys.time(), "%a %b %d %X %Y."), "\n") |