diff small_rna_maps.r @ 1:615fa2171a34 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit c3d728b98db4987821feae40952d9797c97eaf5a"
author artbio
date Fri, 04 Oct 2019 04:33:08 -0400
parents 0a06985c0894
children 59d93aa7cc20
line wrap: on
line diff
--- a/small_rna_maps.r	Tue Aug 22 12:06:58 2017 -0400
+++ b/small_rna_maps.r	Fri Oct 04 04:33:08 2019 -0400
@@ -1,7 +1,7 @@
 ## Setup R error handling to go to stderr
 options( show.error.messages=F,
-       error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
-warnings()
+         error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } )
+options(warn = -1)
 library(RColorBrewer)
 library(lattice)
 library(latticeExtra)
@@ -9,133 +9,246 @@
 library(gridExtra)
 library(optparse)
 
+
 option_list <- list(
-    make_option(c("-f", "--first_dataframe"), type="character", help="path to first dataframe"),
-    make_option(c("-e", "--extra_dataframe"), type="character", help="path to additional dataframe"),
-    make_option("--extra_plot_method", type = "character", help="How additional data should be plotted"),
-    make_option("--output_pdf", type = "character", help="path to the pdf file with plots")
-    )
- 
+  make_option(c("-i", "--ymin"), type="double", help="set min ylimit. e.g. '-100.0'"),
+  make_option(c("-a", "--ymax"), type="double", help="set max ylimit. e.g. '100.0'"),
+  make_option(c("-f", "--first_dataframe"), type="character", help="path to first dataframe"),
+  make_option(c("-e", "--extra_dataframe"), type="character", help="path to additional dataframe"),
+  make_option(c("-n", "--normalization"), type="character", help="space-separated normalization/size factors"),
+  make_option("--first_plot_method", type = "character", help="How additional data should be plotted"),
+  make_option("--extra_plot_method", type = "character", help="How additional data should be plotted"),
+  make_option("--global", type = "character", help="data should be plotted as global size distribution"),
+  make_option("--output_pdf", type = "character", help="path to the pdf file with plots")
+)
+
 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
 args = parse_args(parser)
- 
+
 # data frames implementation
-
+## first table
 Table = read.delim(args$first_dataframe, header=T, row.names=NULL)
-Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1))
+colnames(Table)[1] <- "Dataset"
+if (args$first_plot_method == "Counts" | args$first_plot_method == "Size") {
+  Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1))
+}
 n_samples=length(unique(Table$Dataset))
-genes=unique(levels(Table$Chromosome))
+samples = unique(Table$Dataset)
+if (args$normalization != "") {
+  norm_factors = as.numeric(unlist(strsplit(args$normalization, " ")))
+} else {
+  norm_factors = rep(1, n_samples)
+}
+if (args$first_plot_method == "Counts" | args$first_plot_method == "Size" | args$first_plot_method == "Coverage") {
+  i = 1
+  for (sample in samples) {
+    Table[, length(Table)][Table$Dataset==sample] <- Table[, length(Table)][Table$Dataset==sample]*norm_factors[i]
+    i = i + 1
+  }
+}
+genes=unique(Table$Chromosome)
 per_gene_readmap=lapply(genes, function(x) subset(Table, Chromosome==x))
 per_gene_limit=lapply(genes, function(x) c(1, unique(subset(Table, Chromosome==x)$Chrom_length)) )
 n_genes=length(per_gene_readmap)
-
-ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL)
-if (args$extra_plot_method=='Size') {
-    ExtraTable <- within(ExtraTable, Count[Polarity=="R"] <- (Count[Polarity=="R"]*-1))
+# second table
+if (args$extra_plot_method != '') {
+  ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL)
+  colnames(ExtraTable)[1] <- "Dataset"
+  if (args$extra_plot_method == "Counts" | args$extra_plot_method=='Size') {
+    ExtraTable <- within(ExtraTable, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1))
+  }
+  if (args$extra_plot_method == "Counts" | args$extra_plot_method == "Size" | args$extra_plot_method == "Coverage") {
+    i = 1
+    for (sample in samples) {
+      ExtraTable[, length(ExtraTable)][ExtraTable$Dataset==sample] <- ExtraTable[, length(ExtraTable)][ExtraTable$Dataset==sample]*norm_factors[i]
+      i = i + 1
     }
-per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x))
-    
-## end of data frames implementation
+  }
+  per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x))
+}
 
 ## functions
-
-first_plot = function(df, ...) {
-    combineLimits(xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
-    data=df,
-    type='h',
-    lwd=1.5,
-    scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
-    xlab=NULL, main=NULL, ylab=NULL,
-    as.table=T,
-    origin = 0,
-    horizontal=FALSE,
-    group=Polarity,
-    col=c("red","blue"),
-    par.strip.text = list(cex=0.7),
-    ...))
-    }
+globalbc = function(df, global="", ...) {
+  if (global == "yes") {
+    bc <- barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset)),
+                   data = df, origin = 0,
+                   horizontal=FALSE,
+                   col=c("darkblue"),
+                   scales=list(y=list(tick.number=4, rot=90, relation="same", cex=0.5, alternating=T), x=list(rot=0, cex=0.6, tck=0.5, alternating=c(3,3))),
+                   xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85),
+                   ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85),
+                   main=title_first_method[[args$first_plot_method]],
+                   layout = c(2, 6), newpage=T,
+                   as.table=TRUE,
+                   aspect=0.5,
+                   strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue"),
+                   ...
+    )
+  } else {
+    bc <- barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset)),
+                   data = df, origin = 0,
+                   horizontal=FALSE,
+                   group=Polarity,
+                   stack=TRUE,
+                   col=c('red', 'blue'),
+                   scales=list(y=list(tick.number=4, rot=90, relation="same", cex=0.5, alternating=T), x=list(rot=0, cex=0.6, tck=0.5, alternating=c(3,3))),
+                   xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85),
+                   ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85),
+                   main=title_first_method[[args$first_plot_method]],
+                   layout = c(2, 6), newpage=T,
+                   as.table=TRUE,
+                   aspect=0.5,
+                   strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue"),
+                   ...
+    )
+  }
+  return(bc)
+}
+plot_unit = function(df, method=args$first_plot_method, ...) {
+  if (exists('ymin', where=args)){
+    min=args$ymin
+  }else{
+    min=''
+  }
+  if ((exists('ymax', where=args))){
+    max=args$ymax
+  }else{
+    max=''
+  }
+  ylimits=c(min,max)
+  if (method == 'Counts') {
+    p = xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
+               data=df,
+               type='h',
+               lwd=1.5,
+               scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
+               xlab=NULL, main=NULL, ylab=NULL, ylim=ylimits,
+               as.table=T,
+               origin = 0,
+               horizontal=FALSE,
+               group=Polarity,
+               col=c("red","blue"),
+               par.strip.text = list(cex=0.7),
+               ...)
+    p=combineLimits(p)
+  } else if (method != "Size") {
+    p = xyplot(eval(as.name(method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
+               data=df,
+               type='p',
+               pch=19,
+               cex=0.35,
+               scales= list(relation="free", x=list(rot=0, cex=0.7, tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
+               xlab=NULL, main=NULL, ylab=NULL, ylim=ylimits,
+               as.table=T,
+               origin = 0,
+               horizontal=FALSE,
+               group=Polarity,
+               col=c("red","blue"),
+               par.strip.text = list(cex=0.7),
+               ...)
+  } else {
+    p = barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0,
+                 horizontal=FALSE,
+                 group=Polarity,
+                 stack=TRUE,
+                 col=c('red', 'blue'),
+                 scales=list(y=list(rot=90, relation="free", cex=0.7), x=list(rot=0, cex=0.7, axs="i", tck=c(1,0))),
+                 xlab = NULL,
+                 ylab = NULL,
+                 main = NULL,
+                 as.table=TRUE,
+                 par.strip.text = list(cex=0.6),
+                 ...)
+    p=combineLimits(p)
+  }
+  return(p)
+}
 
 
-second_plot = function(df, ...) {
-    #smR.prepanel=function(x,y,...) {; yscale=c(y*0, max(abs(y)));list(ylim=yscale);}
-    sizeplot = xyplot(eval(as.name(args$extra_plot_method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)),
-    data=df,
-    type='p',
-    cex=0.35,
-    pch=19,
-    scales= list(relation="free", x=list(rot=0, cex=0, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)),
-    xlab=NULL, main=NULL, ylab=NULL,
-    as.table=T,
-    origin = 0,
-    horizontal=FALSE,
-    group=Polarity,
-    col=c("darkred","darkblue"),
-    par.strip.text = list(cex=0.7),
-    ...)
-    combineLimits(sizeplot)
-    }
+## function parameters
 
-second_plot_size = function(df, ...) {
-#  smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);}
-  bc= barchart(Count~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0,
-    horizontal=FALSE,
-group=Polarity,
-stack=TRUE,
-    col=c('red', 'blue'),
-    cex=0.75,
-    scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ),
-#    prepanel=smR.prepanel,
-    xlab = NULL,
-    ylab = NULL,
-    main = NULL,
-    as.table=TRUE,
-    newpage = T,
-    par.strip.text = list(cex=0.7),
-    ...)
-  combineLimits(bc)
-  }
-
+#par.settings.firstplot = list(layout.heights=list(top.padding=11, bottom.padding = -14))
+#par.settings.secondplot=list(layout.heights=list(top.padding=11, bottom.padding = -15), strip.background=list(col=c("lavender","deepskyblue")))
+par.settings.firstplot = list(layout.heights=list(top.padding=-2, bottom.padding=-2),strip.background=list(col=c("lightblue","lightgreen")))
+par.settings.secondplot=list(layout.heights=list(top.padding=-1, bottom.padding=-1),strip.background=list(col=c("lightblue","lightgreen")))
+title_first_method = list(Counts="Read Counts", Coverage="Coverage depths", Median="Median sizes", Mean="Mean sizes", Size="Size Distributions")
+title_extra_method = list(Counts="Read Counts", Coverage="Coverage depths", Median="Median sizes", Mean="Mean sizes", Size="Size Distributions")
+legend_first_method =list(Counts="Read count", Coverage="Coverage depth", Median="Median size", Mean="Mean size", Size="Read count")
+legend_extra_method =list(Counts="Read count", Coverage="Coverage depth", Median="Median size", Mean="Mean size", Size="Read count")
+bottom_first_method =list(Counts="Coordinates (nucleotides)",Coverage="Coordinates (nucleotides)", Median="Coordinates (nucleotides)", Mean="Coordinates (nucleotides)", Size="Sizes of reads")
+bottom_extra_method =list(Counts="Coordinates (nucleotides)",Coverage="Coordinates (nucleotides)", Median="Coordinates (nucleotides)", Mean="Coordinates (nucleotides)", Size="Sizes of reads")
 
-## end of functions
-
-## function parameters
-par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=0), strip.background = list(col=c("lightblue","lightgreen")) )
-par.settings.size=list(layout.heights=list(top.padding=0, bottom.padding=0))
-graph_title=list(Coverage="Read Maps and Coverages", Median="Read Maps and Median sizes", Mean="Read Maps and Mean sizes", SizeDistribution="Read Maps and Size Distributions")
-graph_legend=list(Coverage="Read counts / Coverage", Median="Read counts / Median size", Mean="Read counts / Mean size", SizeDistribution="Read counts")
-graph_bottom=list(Coverage="Nucleotide coordinates", Median="Nucleotide coordinates", Mean="Nucleotide coordinates", Size="Read sizes / Nucleotide coordinates")
-## end of function parameters'
+## Plotting Functions
 
-## GRAPHS
-
-if (n_genes > 5) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=6} else {
-                 rows_per_page= n_genes; page_height_simple = 2.5*n_genes; page_height_combi=page_height_simple*2 }
-if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/2} # to test
-
-
-pdf(file=args$output_pdf, paper="special", height=page_height_simple, width=page_width)
-if (rows_per_page %% 2 != 0) { rows_per_page = rows_per_page + 1}
-for (i in seq(1,n_genes,rows_per_page/2)) {
+double_plot <- function(...) {
+  page_height = 15
+  rows_per_page = 10
+  graph_heights=c(40,30,40,30,40,30,40,30,40,30,10)
+  page_width=8.2677 * n_samples / 2
+  pdf(file=args$output_pdf, paper="special", height=page_height, width=page_width)
+  for (i in seq(1,n_genes,rows_per_page/2)) {
     start=i
     end=i+rows_per_page/2-1
     if (end>n_genes) {end=n_genes}
-    first_plot.list=lapply(per_gene_readmap[start:end], function(x) first_plot(x, strip=FALSE, par.settings=par.settings.readmap))
-    if (args$extra_plot_method == "Size") {
-        second_plot.list=lapply(per_gene_size[start:end], function(x) second_plot_size(x, par.settings=par.settings.size))
-        }
-    else {
-        second_plot.list=lapply(per_gene_size[start:end], function(x) second_plot(x, par.settings=par.settings.size))
-        }
-    
-        
-    plot.list=rbind(second_plot.list, first_plot.list)
-    args_list=c(plot.list, list(nrow=rows_per_page+1, ncol=1,
-                                    top=textGrob(graph_title[[args$extra_plot_method]], gp=gpar(cex=1), just="top"),
-                                    left=textGrob(graph_legend[[args$extra_plot_method]], gp=gpar(cex=1), vjust=1, rot=90),
-                                    sub=textGrob(graph_bottom[[args$extra_plot_method]], gp=gpar(cex=1), just="bottom")
-                                    )
-           )
-do.call(grid.arrange, args_list)
+    if (end-start+1 < 5) {graph_heights=c(rep(c(40,30),end-start+1),10,rep(c(40,30),5-(end-start+1)))}
+    first_plot.list = lapply(per_gene_readmap[start:end], function(x) update(useOuterStrips(plot_unit(x, par.settings=par.settings.secondplot), strip.left=strip.custom(par.strip.text = list(cex=0.5)))))
+    second_plot.list = lapply(per_gene_size[start:end], function(x) update(useOuterStrips(plot_unit(x, method=args$extra_plot_method, par.settings=par.settings.firstplot), strip.left=strip.custom(par.strip.text = list(cex=0.5)), strip=FALSE)))
+    plot.list=rbind(first_plot.list, second_plot.list)
+    args_list=c(plot.list, list( nrow=rows_per_page+1, ncol=1, heights=unit(graph_heights, rep("mm", 11)),
+                                 top=textGrob(paste(title_first_method[[args$first_plot_method]], "and", title_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), vjust=0, just="top"),
+                                 left=textGrob(paste(legend_first_method[[args$first_plot_method]], "/", legend_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), vjust=0, hjust=0, x=1, y=(-0.38/4)*(end-start-(3.28/0.38)), rot=90),
+                                 sub=textGrob(paste(bottom_first_method[[args$first_plot_method]], "/", bottom_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), just="bottom", vjust=2)
+    )
+    )
+    do.call(grid.arrange, args_list)
+  }
+  devname=dev.off()
 }
-devname=dev.off()
+
 
+single_plot <- function(...) {
+  width = 8.2677 * n_samples / 2
+  rows_per_page=8
+  graph_heights=c(rep(40,8),10)
+  pdf(file=args$output_pdf, paper="special", height=15, width=width)
+  for (i in seq(1,n_genes,rows_per_page)) {
+    start=i
+    end=i+rows_per_page-1
+    if (end>n_genes) {end=n_genes}
+    if (end-start+1 < 8) {graph_heights=c(rep(c(40),end-start+1),10,rep(c(40),8-(end-start+1)))}
+    first_plot.list = lapply(per_gene_readmap[start:end], function(x) update(useOuterStrips(plot_unit(x, par.settings=par.settings.firstplot),strip.left=strip.custom(par.strip.text = list(cex=0.5)))))
+    plot.list=rbind(first_plot.list)
+    args_list=c(plot.list, list( nrow=rows_per_page+1, ncol=1, heights=unit(graph_heights, rep("mm", 9)),
+                                 top=textGrob(title_first_method[[args$first_plot_method]], gp=gpar(cex=1), vjust=0, just="top"),
+                                 left=textGrob(legend_first_method[[args$first_plot_method]], gp=gpar(cex=1), vjust=0, hjust=0, x=1, y=(-0.41/7)*(end-start-(6.23/0.41)), rot=90),
+                                 sub=textGrob(bottom_first_method[[args$first_plot_method]], gp=gpar(cex=1), just="bottom", vjust=2)
+    )
+    )
+    do.call(grid.arrange, args_list)
+  }
+  devname=dev.off()
+}
+
+# main
+
+if (args$extra_plot_method != '') { double_plot() }
+if (args$extra_plot_method == '' & !exists('global', where=args)) {
+  single_plot()
+}
+if (exists('global', where=args)) {
+  pdf(file=args$output, paper="special", height=11.69)
+  Table <- within(Table, Counts[Polarity=="R"] <- abs(Counts[Polarity=="R"])) # retropedalage
+  library(reshape2)
+  ml = melt(Table, id.vars = c("Dataset", "Chromosome", "Polarity", "Size"))
+  if (args$global == "nomerge") {
+    castml = dcast(ml, Dataset+Polarity+Size ~ variable, function(x) sum(x))
+    castml <- within(castml, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1))
+    bc = globalbc(castml, global="no")
+  } else {
+    castml = dcast(ml, Dataset+Size ~ variable, function(x) sum(x))
+    bc = globalbc(castml, global="yes")
+  }
+  plot(bc)
+  devname=dev.off()
+}
+