Mercurial > repos > messersc > jamm
view peakhelper.r @ 0:d42f4d78c85e draft
Uploaded
author | messersc |
---|---|
date | Wed, 17 Dec 2014 10:40:23 -0500 |
parents | |
children | 243f75d0ed6e |
line wrap: on
line source
######################################################################################### #### Fixes output of peakfinder.r to produce proper narrowPeak format with proper scores #### R script ######################################################################################## # ======================= # User-defined variables # ======================= options(warn = -1, scipen = 1000) #R will not report any warnings (warn = -1), R will not use scientific notation (scipen = 1000) #=======================> DONE! # ============================================== # Parsing Arguments (source: phantom SPP script) # ============================================== args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line #Parsing arguments and storing values for (each.arg in args) { #bed file names if (grepl('^-filelist=',each.arg)) { arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = if (! is.na(arg.split[2]) ) { bednames <- arg.split[2] } else { stop('No bed file names') } } } options(stringsAsFactors = FALSE) #=======================> DONE! # ============================================== # Required Libraries check (Source: http://r.789695.n4.nabble.com/test-if-a-package-is-installed-td1750671.htm) # ============================================== if ((is.element('sqldf', installed.packages()[,1]) == FALSE)) { stop("R package 'sqldf' is required. Please install it!") } suppressPackageStartupMessages(library("sqldf")) suppressPackageStartupMessages(library("tcltk")) #=======================> DONE! # ================= # Custom Functions # ================= score <- function(x){ x = ((x-min(x))/(max(x)-min(x)))*10000 return(x) } fixnpf = function(bedfile) { writethis = read.table(bedfile, header=FALSE) message(paste0("Minimum Peak Width Used to Produce Filtered List: ", writethis$V11[1])) writethis$V9 = p.adjust(writethis$V8, method = "BH") writethis$V8[writethis$V8 == 0] = 10 writethis$V8[writethis$V8 == 10] = min(writethis$V8) writethis$V8 = -(log10(writethis$V8)) writethis$V9[writethis$V9 == 0] = 10 writethis$V9[writethis$V9 == 10] = min(writethis$V9) writethis$V9 = -(log10(writethis$V9)) writethis$V7 = as.numeric(writethis$V7) * writethis$V9 writethis$V5 = score(as.numeric(writethis$V7)) #find peak score cutoff (pareto distribution) sizes = writethis$V3 - writethis$V2 scores = writethis$V7 ss = cbind(sizes, scores) ss = ss[ss[,1] > writethis$V11[1],,drop=FALSE] scores = ss[,2] scores = scores[scores > 0] num = length(scores) if (length(scores) > 300000) { scores = scores[1:300000] num = 300000 } den = log(min(scores)) den = sum(log(scores) - den) alpha = num / den geom = (exp(1/alpha)) * (min(scores)) message(paste0("Minimum Peak Score Used to Produce Filtered List: ", geom)) #done now write something write(paste(writethis$V1, writethis$V2, writethis$V3, writethis$V4, writethis$V5, ".", writethis$V7, writethis$V8, writethis$V9, writethis$V10, writethis$V11, geom, sep = "\t"), file = bedfile, ncolumns = 1) return(bedfile) } #=======================> DONE! # ================== # Fixing narrowpeak # ================== bedfiles = fixnpf(bednames) #read in all bed files (samples and control) #=======================> DONE!