diff small_rna_map.r @ 5:d65045e976e6 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_map commit b673d39fbe79f5164ba6489b33cfa78ac238ee09
author artbio
date Sat, 22 Jul 2017 11:45:52 -0400
parents 2e0dc6032a98
children
line wrap: on
line diff
--- a/small_rna_map.r	Tue Jul 18 17:35:52 2017 -0400
+++ b/small_rna_map.r	Sat Jul 22 11:45:52 2017 -0400
@@ -1,102 +1,141 @@
-library(optparse)
-library(ggplot2)
+## Setup R error handling to go to stderr
+#options( show.error.messages=F,
+#       error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+warnings()
+library(RColorBrewer)
+library(lattice)
+library(latticeExtra)
+library(grid)
 library(gridExtra)
-library(RColorBrewer)
-library(gtable)
-library(grid)
+library(optparse)
  
 option_list <- list(
     make_option(c("-r", "--output_tab"), type="character", help="path to tabular file"),
-    make_option("--output_pdf", type = "character", help="path to the pdf file with plot")
+    make_option(c("-s", "--sizes"), type="character", help="path to size dataframe"),
+    make_option("--output_pdf", type = "character", help="path to the pdf file with plot"),
+    make_option("--extra_plot", type = "character", help="what additional data should be plotted")
     )
  
 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
 args = parse_args(parser)
+if (length(args$sizes) != 0) { args$extra_plot <- "SizeDistribution"}
  
-theme_set(theme_bw()) #a theme with a white background
+# dataset manipulation
+
 Table = read.delim(args$output_tab, header=T, row.names=NULL)
 Table <- within(Table, Nbr_reads[Polarity=="R"] <- (Nbr_reads[Polarity=="R"]*-1))
-Chr_limits <- unique(data.frame(Dataset=Table$Dataset, Chromosome=Table$Chromosome,
-                                Chrom_length=Table$Chrom_length))
-Chr_limits_inf <- data.frame(Coordinate=Chr_limits$Chrom_length*0,
-                             Nbr_reads=Chr_limits$Chrom_length*0,
-                             Polarity=rep("F", length(Chr_limits$Dataset)),
-                             Max=Chr_limits$Chrom_length*0,
-                             Mean=Chr_limits$Chrom_length*0,
-                             Median=Chr_limits$Chrom_length*0)
-Chr_limits_inf <- cbind(Chr_limits, Chr_limits_inf)
-Chr_limits_sup <- data.frame(Coordinate=Chr_limits$Chrom_length+1,
-                             Nbr_reads=Chr_limits$Chrom_length*0,
-                             Polarity=rep("F", length(Chr_limits$Dataset)),
-                             Max=Chr_limits$Chrom_length*0,
-                             Mean=Chr_limits$Chrom_length*0,
-                             Median=Chr_limits$Chrom_length*0)
-Chr_limits_sup <- cbind(Chr_limits, Chr_limits_sup)
-Table <- rbind(Table, Chr_limits_inf, Chr_limits_sup)
- 
-#To assign colors to categorical variables in ggplot2 that have stable mapping
-myColors <- brewer.pal(3,"Set1")
-names(myColors) <- levels(Table$Polarity)
-colScale <- scale_colour_manual(name = "Polarity",values = myColors)
- 
-#Make initial figures
+n_samples=length(unique(Table$Dataset))
+genes=unique(levels(Table$Chromosome))
+per_gene_readmap=lapply(genes, function(x) subset(Table, Chromosome==x))
+per_gene_limit=lapply(genes, function(x) c(1, unique(subset(Table, Chromosome==x)$Chrom_length)) )
+n_genes=length(per_gene_readmap)
+if (args$extra_plot == "SizeDistribution") {
+    size=read.delim(args$sizes, header=T, row.names=NULL)
+    size <- within(size, Nbr_reads[Polarity=="R"] <- (Nbr_reads[Polarity=="R"]*-1))
+    per_gene_size=lapply(genes, function(x) subset(size, Chromosome==x))
+    }
+
+## end of data frames implementation
+
+## functions
 
-p <- ggplot(Table, aes(x=Coordinate, y=Nbr_reads, colour=Polarity)) +
-  colScale+
-  geom_segment(aes(y = 0, x = Coordinate, yend = Nbr_reads, xend = Coordinate, color=Polarity)) +
-#  geom_segment(aes(y = Nbr_reads, x = 0, yend=Nbr_reads, xend=Chrom_length), alpha=0)+
-  facet_wrap(Dataset~Chromosome, scales="free", nrow=1, labeller = label_wrap_gen(multi_line = FALSE))+
-#  scale_x_continuous(limits = c(rep(0, length(Table$Chromosome)), Chr_lengths$Chrom_length)) +
-  scale_y_continuous(breaks = function(x) round(pretty(seq(-(max(x) + 1), (max(x) + 1)))))+ # to display only integer values on y axis
-  geom_hline(yintercept=0, size=0.3)+
-  theme(strip.text = element_text(size = 6, lineheight = 0.1), #specify strip size
-        panel.grid.major = element_line(colour = "#ffffff"),#conceal major grid lines
-        panel.grid.minor = element_line(colour = "#ffffff"),#conceal minor grid lines
-        axis.title = element_blank(),# Conceal axis titles
-        axis.text = element_text(size = 6),#modify the size of tick labels along axes
-        legend.position = "none") # Hide the repeate caption
+plot_readmap=function(df, ...) {
+    combineLimits(xyplot(Nbr_reads~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
+    data=df,
+    type='h',
+    lwd=1.5,
+    scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
+    xlab=NULL, main=NULL, ylab=NULL,
+    as.table=T,
+    origin = 0,
+    horizontal=FALSE,
+    group=Polarity,
+    col=c("red","blue"),
+    par.strip.text = list(cex=0.7),
+    ...))
+    }
+
+
+plot_size=function(df, ...) {
+    #smR.prepanel=function(x,y,...) {; yscale=c(y*0, max(abs(y)));list(ylim=yscale);}
+    sizeplot = xyplot(eval(as.name(args$extra_plot))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
+    data=df,
+    type='p',
+    cex=0.35,
+    pch=19,
+    scales= list(relation="free", x=list(rot=0, cex=0, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
+    xlab=NULL, main=NULL, ylab=NULL,
+    as.table=T,
+    origin = 0,
+    horizontal=FALSE,
+    group=Polarity,
+    col=c("darkred","darkblue"),
+    par.strip.text = list(cex=0.7),
+    ...)
+    combineLimits(sizeplot)
+    }
 
-# Create legend
-mylegend <- legendGrob(c("F", "R", "Median", "Mean"), pch=22,
-                     gp=gpar(col = c("red","blue","black","yellow"), fill = c("red","blue","black","yellow")))
- 
-# The second plot
-cols<- c("Median"="#000000", "Mean"="#fffa00")
-p2 <- ggplot(Table, aes(x = Coordinate, group=1)) +
-  geom_point(aes(y=Median, colour="Median"), alpha=1, size = 1) +
-  geom_point(aes(y=Mean, colour="Mean"), alpha= 0.5, size = 1.2)+
-  scale_colour_manual(name="", values=cols)+ 
-  expand_limits(y = seq(0,max(Table$Median),by=5)) +
-  facet_wrap(Dataset~Chromosome, scales="free", nrow=1, labeller = label_wrap_gen(multi_line = FALSE))+
-#  geom_segment(aes(y = Nbr_reads, x = 0, yend=Nbr_reads, xend=Chrom_length), alpha=0)+
-  scale_y_continuous(limits = c(0,max(Table$Median)), position = "left")+
-  theme(strip.background = element_blank(),
-        strip.text.x = element_blank(),
-        panel.background = element_rect(fill = NA),
-        panel.grid.major = element_blank(),
-        panel.grid.minor = element_blank(),
-        panel.border = element_rect(fill = NA, colour = "grey50"),
-        axis.text = element_text(size = 6),
-        axis.title = element_blank(),
-        legend.position = "none")
+plot_size_distribution= function(df, ...) {
+#  smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
+  bc= barchart(Nbr_reads~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0,
+    horizontal=FALSE,
+group=Polarity,
+stack=TRUE,
+    col=c('red', 'blue'),
+    cex=0.75,
+    scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ),
+#    prepanel=smR.prepanel,
+    xlab = NULL,
+    ylab = NULL,
+    main = NULL,
+    as.table=TRUE,
+    newpage = T,
+    par.strip.text = list(cex=0.7),
+    ...)
+  combineLimits(bc)
+  }
+
+
+## end of functions
+
+## function parameters
+par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=0), strip.background = list(col=c("lightblue","lightgreen")) )
+par.settings.size=list(layout.heights=list(top.padding=0, bottom.padding=0))
+graph_title=list(Coverage="Read Maps and Coverages", Median="Read Maps and Median sizes", Mean="Read Maps and Mean sizes", SizeDistribution="Read Maps and Size Distributions")
+graph_legend=list(Coverage="Read counts / Coverage", Median="Read counts / Median size", Mean="Read counts / Mean size", SizeDistribution="Read counts")
+graph_bottom=list(Coverage="Nucleotide coordinates", Median="Nucleotide coordinates", Mean="Nucleotide coordinates", SizeDistribution="Read sizes / Nucleotide coordinates")
+## end of function parameters'
 
-# Transforme ggplot graphs on list of graphs
-plot.list1 <- by(data     = Table,
-                INDICES  = c(Table$Chromosome),
-                #simplify = TRUE,
-                FUN      = function(x) {p %+% x }
-                )
- 
-plot.list2 <- by(data     = Table,
-                INDICES  = c(Table$Chromosome),
-                simplify = TRUE,
-                FUN      = function(x) {
-                  p2 %+% x 
-                })
+## GRAPHS
+
+if (n_genes > 5) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=6} else {
+                 rows_per_page= n_genes; page_height_simple = 2.5*n_genes; page_height_combi=page_height_simple*2 }
+if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/2} # to test
+
 
-# Plotting in multiple pages with different rows
+pdf(file=args$output_pdf, paper="special", height=page_height_simple, width=page_width)
+if (rows_per_page %% 2 != 0) { rows_per_page = rows_per_page + 1}
+for (i in seq(1,n_genes,rows_per_page/2)) {
+    start=i
+    end=i+rows_per_page/2-1
+    if (end>n_genes) {end=n_genes}
+    readmap_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_readmap(x, strip=FALSE, par.settings=par.settings.readmap))
+    if (args$extra_plot == "SizeDistribution") {
+        size_plot.list=lapply(per_gene_size[start:end], function(x) plot_size_distribution(x, par.settings=par.settings.size))
+        }
+    else {
+        size_plot.list=lapply(per_gene_readmap[start:end], function(x) plot_size(x, par.settings=par.settings.size))
+        }
+    
+        
+    plot.list=rbind(size_plot.list, readmap_plot.list)
+    args_list=c(plot.list, list(nrow=rows_per_page+1, ncol=1,
+                                    top=textGrob(graph_title[[args$extra_plot]], gp=gpar(cex=1), just="top"),
+                                    left=textGrob(graph_legend[[args$extra_plot]], gp=gpar(cex=1), vjust=1, rot=90),
+                                    sub=textGrob(graph_bottom[[args$extra_plot]], gp=gpar(cex=1), just="bottom")
+                                    )
+           )
+do.call(grid.arrange, args_list)
+}
+devname=dev.off()
 
-grobs=rbind(plot.list1,plot.list2)
-multi.plot<-do.call(marrangeGrob,list(grobs,ncol=1,nrow=8,top=NULL, 
-            bottom="Coordinates(nt)", left="Number of reads / Median & Mean", right= mylegend))
-ggsave(args$output_pdf, device="pdf", plot=multi.plot, height=11.69, width=8.2)
\ No newline at end of file