annotate rna_probing_coverage.R @ 0:83dfe38f6a09 draft

Uploaded
author nikos
date Tue, 04 Nov 2014 14:28:45 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
1 ## Setup R error handling to go to stderr
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
2 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
3
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
4 # we need that to not crash galaxy with an UTF8 error on German LC settings.
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
5 #Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
6
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
7 suppressMessages(library('getopt'))
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
8
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
9 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
10 args <- commandArgs(trailingOnly = TRUE)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
11
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
12 #get options, using the spec as defined by the enclosed list.
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
13 #we read the options from the default: commandArgs(TRUE).
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
14 spec = matrix(c(
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
15 'verbose', 'v', 2, "integer",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
16 'help' , 'h', 0, "logical",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
17 'sample', 's', 1, "character",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
18 'control', 'c', 2, "character",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
19 'window', 'w', 1, "integer",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
20 'ball', 'b', 1, "double",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
21 'peak', 'p', 1, "integer",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
22 'output', 'o', 1, "character",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
23 'counts', 'x', 1, "character",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
24 'ratio', 'y', 1, "character",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
25 'priming', 'z', 1, "character",
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
26 'plots', 'l', 2, "character"
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
27 ), byrow=TRUE, ncol=4);
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
28
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
29 opt = getopt(spec);
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
30
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
31 # if help was asked for print a friendly message
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
32 # and exit with a non-zero error code
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
33 if ( !is.null(opt$help) ) {
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
34 cat(getopt(spec, usage=TRUE));
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
35 q(status=1);
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
36 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
37
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
38
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
39 #This is streamlined protocol that works only with reads mapped to one RNA molecule (16S rRNA) #For plot used in NAR preparation (without two bottom plots) see under many #'s
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
40
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
41 suppressMessages(library('GenomicRanges'));
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
42 #setwd("/home/nikos/rna_probing_galaxy")
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
43
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
44 #Inputs:
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
45 #Options:
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
46 window_size <- opt$window
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
47 ball_size <- opt$ball
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
48 peak <- opt$peak
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
49
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
50 #Initialize variables:
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
51 counts <- list()
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
52 gene_coverage <- c()
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
53 gene_counts <- c()
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
54 gene_ratio <- c()
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
55 gene_priming <- c()
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
56 #End of initializing
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
57
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
58 ####Read in datasets:
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
59 counts[[1]] <- read.table( opt$sample )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
60
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
61 #if ( !is.null( opt$control ) ) {
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
62 # counts[[2]] <- read.table( opt$control )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
63 #}
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
64 #####
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
65
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
66 #Filter out the short inserts not to deal with size selection correctors.
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
67 for(i in seq(1,length(counts))){
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
68 counts[[i]] <- subset(counts[[i]], (counts[[i]][,3]-counts[[i]][,2])>=peak)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
69 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
70
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
71 ############Initilize matrices for storing data:
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
72 counter <-1
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
73 for(i in seq(1,length(counts))){
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
74 temp_storage <- counts[[i]]
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
75 gene_coverage <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_coverage))), ncol=counter)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
76 gene_counts <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_counts))), ncol=counter)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
77 gene_priming <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_priming))), ncol=counter)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
78 gene_ratio <- matrix(nrow=max(c(temp_storage[,3], nrow(gene_ratio))), ncol=counter)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
79 counter <- counter+1
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
80 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
81 ###########End of Initilize matrices for storing data
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
82
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
83 ##################
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
84 ##Fill in the matrices with data
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
85 counter <-1
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
86 for(i in seq(1,length(counts))){
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
87 temp_storage <- counts[[i]]
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
88 cover <- coverage(IRanges(start=temp_storage[,2], end=(temp_storage[,3]-peak)), weight=temp_storage[,4])
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
89 gene_coverage[1:length((rep(runValue(cover), runLength(cover)))),counter] <- c((rep(runValue(cover), runLength(cover))))
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
90 stopping_reads <- aggregate(temp_storage[,4]~temp_storage[,2],temp_storage, sum)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
91 gene_counts[stopping_reads[,1],counter] <- stopping_reads[,2]
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
92 gene_counts[is.na(gene_counts)] <- 0
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
93 priming_reads <- aggregate(temp_storage[,4]~temp_storage[,3],temp_storage, sum)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
94 gene_priming[priming_reads[,1],counter] <- priming_reads[,2]
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
95 gene_priming[is.na(gene_priming)] <- 0
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
96 counter <- counter+1
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
97 }
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
98
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
99 #End of filling in
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
100 gene_ratio <- gene_counts/gene_coverage
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
101
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
102 ###Export to Galaxy
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
103 data <- data.frame(gene_counts, gene_coverage, gene_priming, gene_ratio )
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
104 colnames(data) <- c("Read counts", "Effective Coverage EUC", "Termination EUC", "TCR")
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
105
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
106 write.table( data, opt$output, sep = "\t", quote = F, row.names = F)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
107
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
108
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
109 #Return plots
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
110 if ( !is.null(opt$plots) ) {
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
111 pdf(opt$plots)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
112
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
113 # Termination signal
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
114 plot(gene_counts, type= 'l', main = "Termination signal", ylab = "", xlab = "Position")
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
115
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
116 # Priming signal
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
117 plot(gene_priming, type= 'l', main = "Priming signal", ylab = "", xlab = "Position")
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
118 # Effective Coverage
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
119
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
120 y <- gene_coverage
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
121 y[is.na(y)] <- 0
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
122 x <- seq_along(y)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
123 y2 <- rep(y, each=2)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
124 y2 <- y2[-length(y2)]
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
125 x2 <- rep(x, each=2)[-1]
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
126 x3 <- c(min(x2), x2, max(x2))
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
127 y3 <- c(0, y2, 0)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
128
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
129 # because polygon() is dumb and wants a pre-existing plot
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
130 plot(x, y, ylim=c(0, max(y)), type="n", main = "Effective Coverage", ylab = "", xlab = "Position")
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
131
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
132 polygon(x3, y3, border=NA, col="black")
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
133 lines(x2, y2)
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
134
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
135 # Termination Coverage Ratio
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
136 plot(gene_ratio, type= 'l', main = "Termination Coverage Ratio", ylab = "", xlab = "Position")
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
137 dump <- dev.off()
83dfe38f6a09 Uploaded
nikos
parents:
diff changeset
138 }