Mercurial > repos > nikos > rna_probing
diff rna_probing_coverage.R @ 0:83dfe38f6a09 draft
Uploaded
| author | nikos |
|---|---|
| date | Tue, 04 Nov 2014 14:28:45 -0500 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rna_probing_coverage.R Tue Nov 04 14:28:45 2014 -0500 @@ -0,0 +1,138 @@ +## Setup R error handling to go to stderr +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +#Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +suppressMessages(library('getopt')) + +options(stringAsfactors = FALSE, useFancyQuotes = FALSE) +args <- commandArgs(trailingOnly = TRUE) + +#get options, using the spec as defined by the enclosed list. +#we read the options from the default: commandArgs(TRUE). +spec = matrix(c( + 'verbose', 'v', 2, "integer", + 'help' , 'h', 0, "logical", + 'sample', 's', 1, "character", + 'control', 'c', 2, "character", + 'window', 'w', 1, "integer", + 'ball', 'b', 1, "double", + 'peak', 'p', 1, "integer", + 'output', 'o', 1, "character", + 'counts', 'x', 1, "character", + 'ratio', 'y', 1, "character", + 'priming', 'z', 1, "character", + 'plots', 'l', 2, "character" + ), byrow=TRUE, ncol=4); + +opt = getopt(spec); + +# if help was asked for print a friendly message +# and exit with a non-zero error code +if ( !is.null(opt$help) ) { + cat(getopt(spec, usage=TRUE)); + q(status=1); +} + + +#This is streamlined protocol that works only with reads mapped to one RNA molecule (16S rRNA) #For plot used in NAR preparation (without two bottom plots) see under many #'s + +suppressMessages(library('GenomicRanges')); +#setwd("/home/nikos/rna_probing_galaxy") + +#Inputs: +#Options: +window_size <- opt$window +ball_size <- opt$ball +peak <- opt$peak + +#Initialize variables: +counts <- list() +gene_coverage <- c() +gene_counts <- c() +gene_ratio <- c() +gene_priming <- c() +#End of initializing + +####Read in datasets: +counts[[1]] <- read.table( opt$sample ) + +#if ( !is.null( opt$control ) ) { +# counts[[2]] <- read.table( opt$control ) +#} +##### + +#Filter out the short inserts not to deal with size selection correctors. +for(i in seq(1,length(counts))){ + counts[[i]] <- subset(counts[[i]], (counts[[i]][,3]-counts[[i]][,2])>=peak) +} + +############Initilize matrices for storing data: +counter <-1 +for(i in seq(1,length(counts))){ + temp_storage <- counts[[i]] + gene_coverage <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_coverage))), ncol=counter) + gene_counts <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_counts))), ncol=counter) + gene_priming <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_priming))), ncol=counter) + gene_ratio <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_ratio))), ncol=counter) + counter <- counter+1 +} +###########End of Initilize matrices for storing data + +################## +##Fill in the matrices with data +counter <-1 +for(i in seq(1,length(counts))){ + temp_storage <- counts[[i]] + cover <- coverage(IRanges(start=temp_storage[,2], end=(temp_storage[,3]-peak)), weight=temp_storage[,4]) + gene_coverage[1:length((rep(runValue(cover), runLength(cover)))),counter] <- c((rep(runValue(cover), runLength(cover)))) + stopping_reads <- aggregate(temp_storage[,4]~temp_storage[,2],temp_storage, sum) + gene_counts[stopping_reads[,1],counter] <- stopping_reads[,2] + gene_counts[is.na(gene_counts)] <- 0 + priming_reads <- aggregate(temp_storage[,4]~temp_storage[,3],temp_storage, sum) + gene_priming[priming_reads[,1],counter] <- priming_reads[,2] + gene_priming[is.na(gene_priming)] <- 0 + counter <- counter+1 +} + +#End of filling in +gene_ratio <- gene_counts/gene_coverage + +###Export to Galaxy +data <- data.frame(gene_counts, gene_coverage, gene_priming, gene_ratio ) +colnames(data) <- c("Read counts", "Effective Coverage EUC", "Termination EUC", "TCR") + +write.table( data, opt$output, sep = "\t", quote = F, row.names = F) + + +#Return plots +if ( !is.null(opt$plots) ) { + pdf(opt$plots) + + # Termination signal + plot(gene_counts, type= 'l', main = "Termination signal", ylab = "", xlab = "Position") + + # Priming signal + plot(gene_priming, type= 'l', main = "Priming signal", ylab = "", xlab = "Position") + # Effective Coverage + + y <- gene_coverage + y[is.na(y)] <- 0 + x <- seq_along(y) + y2 <- rep(y, each=2) + y2 <- y2[-length(y2)] + x2 <- rep(x, each=2)[-1] + x3 <- c(min(x2), x2, max(x2)) + y3 <- c(0, y2, 0) + + # because polygon() is dumb and wants a pre-existing plot + plot(x, y, ylim=c(0, max(y)), type="n", main = "Effective Coverage", ylab = "", xlab = "Position") + + polygon(x3, y3, border=NA, col="black") + lines(x2, y2) + + # Termination Coverage Ratio + plot(gene_ratio, type= 'l', main = "Termination Coverage Ratio", ylab = "", xlab = "Position") + dump <- dev.off() +}
