comparison region_motif_compare.r @ 35:4ce22698acb0 draft

Uploaded
author jeremyjliu
date Thu, 26 Feb 2015 22:54:09 -0500
parents 9525574f700f
children b868314320e2
comparison
equal deleted inserted replaced
34:fe8a884363df 35:4ce22698acb0
2 # Description: Reads in two count files and determines enriched and depleted 2 # Description: Reads in two count files and determines enriched and depleted
3 # motifs (or any location based feature) based on poisson tests and gc 3 # motifs (or any location based feature) based on poisson tests and gc
4 # corrections. All enrichment ratios relative to overall count / gc ratios. 4 # corrections. All enrichment ratios relative to overall count / gc ratios.
5 # Author: Jeremy liu 5 # Author: Jeremy liu
6 # Email: jeremy.liu@yale.edu 6 # Email: jeremy.liu@yale.edu
7 # Date: 15/02/04 7 # Date: 15/02/11
8 # Note: This script is meant to be invoked with the following command 8 # Note: This script is meant to be invoked with the following command
9 # R --slave --vanilla -f ./region_motif_compare.r --args <workingdir> <db> <intab1> <intab2> 9 # R --slave --vanilla -f ./region_motif_compare.r --args <workingdir> <pwm_file>
10 # <enriched_tab> <depleted_tab> <plots_png> 10 # <intab1> <intab2> <enriched_tab> <depleted_tab> <plots_png>
11 # <workingdir> is working directory of galaxy installation 11 # <workingdir> is working directory of galaxy installation
12 # <db> types: "t" test, "p" pouya, "j" jaspar jolma, "m" mouse, "c" combined 12 # Dependencies: region_motif_data_manager, plotting.r
13 # Dependencies: plotting.r
14 13
15 # Auxiliary function to concatenate multiple strings 14 # Auxiliary function to concatenate multiple strings
16 concat <- function(...) { 15 concat <- function(...) {
17 input_list <- list(...) 16 input_list <- list(...)
18 return(paste(input_list, sep="", collapse="")) 17 return(paste(input_list, sep="", collapse=""))
22 options(warn=-1) 21 options(warn=-1)
23 22
24 # Set common and data directories 23 # Set common and data directories
25 args <- commandArgs() 24 args <- commandArgs()
26 workingDir = args[7] 25 workingDir = args[7]
27 dbCode = args[8] 26 pwmFile = args[8].split(',')[0] # If duplicate entires, take first one
28 # dbCode "c" implemented when pwmFile is loaded
29 if (dbCode == "t" | dbCode == "p") {
30 pwmFile = concat(workingDir, "/pwms/pouya.pwms.from.seq.RData")
31 } else if (dbCode == "j") {
32 pwmFile = concat(workingDir, "/pwms/jaspar.jolma.pwms.from.seq.RData")
33 } else if (dbCode == "m") {
34 pwmFile = concat(workingDir, "/pwms/mm9.pwms.from.seq.RData")
35 } else if (dbCode == "c") { # rest of dbCode "c" implemented when pwmFile loaded
36 pwmFile = concat(workingDir, "/pwms/pouya.pwms.from.seq.RData")
37 pwmFile2 = concat(workingDir, "/pwms/jaspar.jolma.pwms.from.seq.RData")
38 } else {
39 pwmFile = concat(workingDir, "/pwms/pouya.pwms.from.seq.RData")
40 }
41 27
42 # Set input and reference files 28 # Set input and reference files
43 inTab1 = args[9] 29 inTab1 = args[9]
44 inTab2 = args[10] 30 inTab2 = args[10]
45 enrichTab = args[11] 31 enrichTab = args[11]
61 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n") 47 cat("Running ... Started at:", format(startTime, "%a %b %d %X %Y"), "...\n")
62 48
63 # Loading motif position weight matrix (pwm) file and input tab file 49 # Loading motif position weight matrix (pwm) file and input tab file
64 cat("Loading and reading input region motif count files...\n") 50 cat("Loading and reading input region motif count files...\n")
65 load(pwmFile) # pwms data structure 51 load(pwmFile) # pwms data structure
66 if (dbCode == "c") { # Remaining implementation of dbCode "c" combined 52 #if (dbCode == "c") { # Remaining implementation of dbCode "c" combined
67 temp = pwms 53 # temp = pwms
68 load(pwmFile2) 54 # load(pwmFile2)
69 pwms = append(temp, pwms) 55 # pwms = append(temp, pwms)
70 } 56 #}
71 region1DF = read_tsv(inTab1) 57 region1DF = read_tsv(inTab1)
72 region2DF = read_tsv(inTab2) 58 region2DF = read_tsv(inTab2)
73 region1Counts = region1DF$counts 59 region1Counts = region1DF$counts
74 region2Counts = region2DF$counts 60 region2Counts = region2DF$counts
75 names(region1Counts) = region1DF$motif 61 names(region1Counts) = region1DF$motif