Mercurial > repos > nikos > rna_probing
comparison rna_probing_coverage.R @ 0:83dfe38f6a09 draft
Uploaded
| author | nikos |
|---|---|
| date | Tue, 04 Nov 2014 14:28:45 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:83dfe38f6a09 |
|---|---|
| 1 ## Setup R error handling to go to stderr | |
| 2 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
| 3 | |
| 4 # we need that to not crash galaxy with an UTF8 error on German LC settings. | |
| 5 #Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
| 6 | |
| 7 suppressMessages(library('getopt')) | |
| 8 | |
| 9 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | |
| 10 args <- commandArgs(trailingOnly = TRUE) | |
| 11 | |
| 12 #get options, using the spec as defined by the enclosed list. | |
| 13 #we read the options from the default: commandArgs(TRUE). | |
| 14 spec = matrix(c( | |
| 15 'verbose', 'v', 2, "integer", | |
| 16 'help' , 'h', 0, "logical", | |
| 17 'sample', 's', 1, "character", | |
| 18 'control', 'c', 2, "character", | |
| 19 'window', 'w', 1, "integer", | |
| 20 'ball', 'b', 1, "double", | |
| 21 'peak', 'p', 1, "integer", | |
| 22 'output', 'o', 1, "character", | |
| 23 'counts', 'x', 1, "character", | |
| 24 'ratio', 'y', 1, "character", | |
| 25 'priming', 'z', 1, "character", | |
| 26 'plots', 'l', 2, "character" | |
| 27 ), byrow=TRUE, ncol=4); | |
| 28 | |
| 29 opt = getopt(spec); | |
| 30 | |
| 31 # if help was asked for print a friendly message | |
| 32 # and exit with a non-zero error code | |
| 33 if ( !is.null(opt$help) ) { | |
| 34 cat(getopt(spec, usage=TRUE)); | |
| 35 q(status=1); | |
| 36 } | |
| 37 | |
| 38 | |
| 39 #This is streamlined protocol that works only with reads mapped to one RNA molecule (16S rRNA) #For plot used in NAR preparation (without two bottom plots) see under many #'s | |
| 40 | |
| 41 suppressMessages(library('GenomicRanges')); | |
| 42 #setwd("/home/nikos/rna_probing_galaxy") | |
| 43 | |
| 44 #Inputs: | |
| 45 #Options: | |
| 46 window_size <- opt$window | |
| 47 ball_size <- opt$ball | |
| 48 peak <- opt$peak | |
| 49 | |
| 50 #Initialize variables: | |
| 51 counts <- list() | |
| 52 gene_coverage <- c() | |
| 53 gene_counts <- c() | |
| 54 gene_ratio <- c() | |
| 55 gene_priming <- c() | |
| 56 #End of initializing | |
| 57 | |
| 58 ####Read in datasets: | |
| 59 counts[[1]] <- read.table( opt$sample ) | |
| 60 | |
| 61 #if ( !is.null( opt$control ) ) { | |
| 62 # counts[[2]] <- read.table( opt$control ) | |
| 63 #} | |
| 64 ##### | |
| 65 | |
| 66 #Filter out the short inserts not to deal with size selection correctors. | |
| 67 for(i in seq(1,length(counts))){ | |
| 68 counts[[i]] <- subset(counts[[i]], (counts[[i]][,3]-counts[[i]][,2])>=peak) | |
| 69 } | |
| 70 | |
| 71 ############Initilize matrices for storing data: | |
| 72 counter <-1 | |
| 73 for(i in seq(1,length(counts))){ | |
| 74 temp_storage <- counts[[i]] | |
| 75 gene_coverage <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_coverage))), ncol=counter) | |
| 76 gene_counts <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_counts))), ncol=counter) | |
| 77 gene_priming <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_priming))), ncol=counter) | |
| 78 gene_ratio <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_ratio))), ncol=counter) | |
| 79 counter <- counter+1 | |
| 80 } | |
| 81 ###########End of Initilize matrices for storing data | |
| 82 | |
| 83 ################## | |
| 84 ##Fill in the matrices with data | |
| 85 counter <-1 | |
| 86 for(i in seq(1,length(counts))){ | |
| 87 temp_storage <- counts[[i]] | |
| 88 cover <- coverage(IRanges(start=temp_storage[,2], end=(temp_storage[,3]-peak)), weight=temp_storage[,4]) | |
| 89 gene_coverage[1:length((rep(runValue(cover), runLength(cover)))),counter] <- c((rep(runValue(cover), runLength(cover)))) | |
| 90 stopping_reads <- aggregate(temp_storage[,4]~temp_storage[,2],temp_storage, sum) | |
| 91 gene_counts[stopping_reads[,1],counter] <- stopping_reads[,2] | |
| 92 gene_counts[is.na(gene_counts)] <- 0 | |
| 93 priming_reads <- aggregate(temp_storage[,4]~temp_storage[,3],temp_storage, sum) | |
| 94 gene_priming[priming_reads[,1],counter] <- priming_reads[,2] | |
| 95 gene_priming[is.na(gene_priming)] <- 0 | |
| 96 counter <- counter+1 | |
| 97 } | |
| 98 | |
| 99 #End of filling in | |
| 100 gene_ratio <- gene_counts/gene_coverage | |
| 101 | |
| 102 ###Export to Galaxy | |
| 103 data <- data.frame(gene_counts, gene_coverage, gene_priming, gene_ratio ) | |
| 104 colnames(data) <- c("Read counts", "Effective Coverage EUC", "Termination EUC", "TCR") | |
| 105 | |
| 106 write.table( data, opt$output, sep = "\t", quote = F, row.names = F) | |
| 107 | |
| 108 | |
| 109 #Return plots | |
| 110 if ( !is.null(opt$plots) ) { | |
| 111 pdf(opt$plots) | |
| 112 | |
| 113 # Termination signal | |
| 114 plot(gene_counts, type= 'l', main = "Termination signal", ylab = "", xlab = "Position") | |
| 115 | |
| 116 # Priming signal | |
| 117 plot(gene_priming, type= 'l', main = "Priming signal", ylab = "", xlab = "Position") | |
| 118 # Effective Coverage | |
| 119 | |
| 120 y <- gene_coverage | |
| 121 y[is.na(y)] <- 0 | |
| 122 x <- seq_along(y) | |
| 123 y2 <- rep(y, each=2) | |
| 124 y2 <- y2[-length(y2)] | |
| 125 x2 <- rep(x, each=2)[-1] | |
| 126 x3 <- c(min(x2), x2, max(x2)) | |
| 127 y3 <- c(0, y2, 0) | |
| 128 | |
| 129 # because polygon() is dumb and wants a pre-existing plot | |
| 130 plot(x, y, ylim=c(0, max(y)), type="n", main = "Effective Coverage", ylab = "", xlab = "Position") | |
| 131 | |
| 132 polygon(x3, y3, border=NA, col="black") | |
| 133 lines(x2, y2) | |
| 134 | |
| 135 # Termination Coverage Ratio | |
| 136 plot(gene_ratio, type= 'l', main = "Termination Coverage Ratio", ylab = "", xlab = "Position") | |
| 137 dump <- dev.off() | |
| 138 } |
