Mercurial > repos > messersc > jamm
diff peakhelper.r @ 0:d42f4d78c85e draft
Uploaded
author | messersc |
---|---|
date | Wed, 17 Dec 2014 10:40:23 -0500 |
parents | |
children | 243f75d0ed6e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peakhelper.r Wed Dec 17 10:40:23 2014 -0500 @@ -0,0 +1,110 @@ +######################################################################################### +#### Fixes output of peakfinder.r to produce proper narrowPeak format with proper scores +#### R script +######################################################################################## + + + + +# ======================= +# User-defined variables +# ======================= +options(warn = -1, scipen = 1000) #R will not report any warnings (warn = -1), R will not use scientific notation (scipen = 1000) +#=======================> DONE! + + + + +# ============================================== +# Parsing Arguments (source: phantom SPP script) +# ============================================== +args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line + + +#Parsing arguments and storing values +for (each.arg in args) { + #bed file names + if (grepl('^-filelist=',each.arg)) { + arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = + if (! is.na(arg.split[2]) ) { + bednames <- arg.split[2] + } else { + stop('No bed file names') + } + } +} +options(stringsAsFactors = FALSE) +#=======================> DONE! + + + + +# ============================================== +# Required Libraries check (Source: http://r.789695.n4.nabble.com/test-if-a-package-is-installed-td1750671.htm) +# ============================================== +if ((is.element('sqldf', installed.packages()[,1]) == FALSE)) { + stop("R package 'sqldf' is required. Please install it!") +} +suppressPackageStartupMessages(library("sqldf")) +suppressPackageStartupMessages(library("tcltk")) +#=======================> DONE! + + + + +# ================= +# Custom Functions +# ================= +score <- function(x){ + x = ((x-min(x))/(max(x)-min(x)))*10000 + return(x) +} + +fixnpf = function(bedfile) { + + writethis = read.table(bedfile, header=FALSE) + + + message(paste0("Minimum Peak Width Used to Produce Filtered List: ", writethis$V11[1])) + writethis$V9 = p.adjust(writethis$V8, method = "BH") + writethis$V8[writethis$V8 == 0] = 10 + writethis$V8[writethis$V8 == 10] = min(writethis$V8) + writethis$V8 = -(log10(writethis$V8)) + writethis$V9[writethis$V9 == 0] = 10 + writethis$V9[writethis$V9 == 10] = min(writethis$V9) + writethis$V9 = -(log10(writethis$V9)) + writethis$V7 = as.numeric(writethis$V7) * writethis$V9 + writethis$V5 = score(as.numeric(writethis$V7)) + + + #find peak score cutoff (pareto distribution) + sizes = writethis$V3 - writethis$V2 + scores = writethis$V7 + ss = cbind(sizes, scores) + ss = ss[ss[,1] > writethis$V11[1],,drop=FALSE] + scores = ss[,2] + scores = scores[scores > 0] + num = length(scores) + if (length(scores) > 300000) { + scores = scores[1:300000] + num = 300000 + } + den = log(min(scores)) + den = sum(log(scores) - den) + alpha = num / den + geom = (exp(1/alpha)) * (min(scores)) + message(paste0("Minimum Peak Score Used to Produce Filtered List: ", geom)) + + #done now write something + write(paste(writethis$V1, writethis$V2, writethis$V3, writethis$V4, writethis$V5, ".", writethis$V7, writethis$V8, writethis$V9, writethis$V10, writethis$V11, geom, sep = "\t"), file = bedfile, ncolumns = 1) + return(bedfile) +} +#=======================> DONE! + + + +# ================== +# Fixing narrowpeak +# ================== +bedfiles = fixnpf(bednames) #read in all bed files (samples and control) +#=======================> DONE!