Mercurial > repos > artbio > small_rna_maps
diff small_rna_maps.r @ 1:615fa2171a34 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/small_rna_maps commit c3d728b98db4987821feae40952d9797c97eaf5a"
| author | artbio |
|---|---|
| date | Fri, 04 Oct 2019 04:33:08 -0400 |
| parents | 0a06985c0894 |
| children | 59d93aa7cc20 |
line wrap: on
line diff
--- a/small_rna_maps.r Tue Aug 22 12:06:58 2017 -0400 +++ b/small_rna_maps.r Fri Oct 04 04:33:08 2019 -0400 @@ -1,7 +1,7 @@ ## Setup R error handling to go to stderr options( show.error.messages=F, - error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) -warnings() + error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +options(warn = -1) library(RColorBrewer) library(lattice) library(latticeExtra) @@ -9,133 +9,246 @@ library(gridExtra) library(optparse) + option_list <- list( - make_option(c("-f", "--first_dataframe"), type="character", help="path to first dataframe"), - make_option(c("-e", "--extra_dataframe"), type="character", help="path to additional dataframe"), - make_option("--extra_plot_method", type = "character", help="How additional data should be plotted"), - make_option("--output_pdf", type = "character", help="path to the pdf file with plots") - ) - + make_option(c("-i", "--ymin"), type="double", help="set min ylimit. e.g. '-100.0'"), + make_option(c("-a", "--ymax"), type="double", help="set max ylimit. e.g. '100.0'"), + make_option(c("-f", "--first_dataframe"), type="character", help="path to first dataframe"), + make_option(c("-e", "--extra_dataframe"), type="character", help="path to additional dataframe"), + make_option(c("-n", "--normalization"), type="character", help="space-separated normalization/size factors"), + make_option("--first_plot_method", type = "character", help="How additional data should be plotted"), + make_option("--extra_plot_method", type = "character", help="How additional data should be plotted"), + make_option("--global", type = "character", help="data should be plotted as global size distribution"), + make_option("--output_pdf", type = "character", help="path to the pdf file with plots") +) + parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) args = parse_args(parser) - + # data frames implementation - +## first table Table = read.delim(args$first_dataframe, header=T, row.names=NULL) -Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1)) +colnames(Table)[1] <- "Dataset" +if (args$first_plot_method == "Counts" | args$first_plot_method == "Size") { + Table <- within(Table, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1)) +} n_samples=length(unique(Table$Dataset)) -genes=unique(levels(Table$Chromosome)) +samples = unique(Table$Dataset) +if (args$normalization != "") { + norm_factors = as.numeric(unlist(strsplit(args$normalization, " "))) +} else { + norm_factors = rep(1, n_samples) +} +if (args$first_plot_method == "Counts" | args$first_plot_method == "Size" | args$first_plot_method == "Coverage") { + i = 1 + for (sample in samples) { + Table[, length(Table)][Table$Dataset==sample] <- Table[, length(Table)][Table$Dataset==sample]*norm_factors[i] + i = i + 1 + } +} +genes=unique(Table$Chromosome) per_gene_readmap=lapply(genes, function(x) subset(Table, Chromosome==x)) per_gene_limit=lapply(genes, function(x) c(1, unique(subset(Table, Chromosome==x)$Chrom_length)) ) n_genes=length(per_gene_readmap) - -ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL) -if (args$extra_plot_method=='Size') { - ExtraTable <- within(ExtraTable, Count[Polarity=="R"] <- (Count[Polarity=="R"]*-1)) +# second table +if (args$extra_plot_method != '') { + ExtraTable=read.delim(args$extra_dataframe, header=T, row.names=NULL) + colnames(ExtraTable)[1] <- "Dataset" + if (args$extra_plot_method == "Counts" | args$extra_plot_method=='Size') { + ExtraTable <- within(ExtraTable, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1)) + } + if (args$extra_plot_method == "Counts" | args$extra_plot_method == "Size" | args$extra_plot_method == "Coverage") { + i = 1 + for (sample in samples) { + ExtraTable[, length(ExtraTable)][ExtraTable$Dataset==sample] <- ExtraTable[, length(ExtraTable)][ExtraTable$Dataset==sample]*norm_factors[i] + i = i + 1 } -per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x)) - -## end of data frames implementation + } + per_gene_size=lapply(genes, function(x) subset(ExtraTable, Chromosome==x)) +} ## functions - -first_plot = function(df, ...) { - combineLimits(xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)), - data=df, - type='h', - lwd=1.5, - scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)), - xlab=NULL, main=NULL, ylab=NULL, - as.table=T, - origin = 0, - horizontal=FALSE, - group=Polarity, - col=c("red","blue"), - par.strip.text = list(cex=0.7), - ...)) - } +globalbc = function(df, global="", ...) { + if (global == "yes") { + bc <- barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset)), + data = df, origin = 0, + horizontal=FALSE, + col=c("darkblue"), + scales=list(y=list(tick.number=4, rot=90, relation="same", cex=0.5, alternating=T), x=list(rot=0, cex=0.6, tck=0.5, alternating=c(3,3))), + xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85), + ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85), + main=title_first_method[[args$first_plot_method]], + layout = c(2, 6), newpage=T, + as.table=TRUE, + aspect=0.5, + strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue"), + ... + ) + } else { + bc <- barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset)), + data = df, origin = 0, + horizontal=FALSE, + group=Polarity, + stack=TRUE, + col=c('red', 'blue'), + scales=list(y=list(tick.number=4, rot=90, relation="same", cex=0.5, alternating=T), x=list(rot=0, cex=0.6, tck=0.5, alternating=c(3,3))), + xlab=list(label=bottom_first_method[[args$first_plot_method]], cex=.85), + ylab=list(label=legend_first_method[[args$first_plot_method]], cex=.85), + main=title_first_method[[args$first_plot_method]], + layout = c(2, 6), newpage=T, + as.table=TRUE, + aspect=0.5, + strip = strip.custom(par.strip.text = list(cex = 1), which.given=1, bg="lightblue"), + ... + ) + } + return(bc) +} +plot_unit = function(df, method=args$first_plot_method, ...) { + if (exists('ymin', where=args)){ + min=args$ymin + }else{ + min='' + } + if ((exists('ymax', where=args))){ + max=args$ymax + }else{ + max='' + } + ylimits=c(min,max) + if (method == 'Counts') { + p = xyplot(Counts~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)), + data=df, + type='h', + lwd=1.5, + scales= list(relation="free", x=list(rot=0, cex=0.7, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)), + xlab=NULL, main=NULL, ylab=NULL, ylim=ylimits, + as.table=T, + origin = 0, + horizontal=FALSE, + group=Polarity, + col=c("red","blue"), + par.strip.text = list(cex=0.7), + ...) + p=combineLimits(p) + } else if (method != "Size") { + p = xyplot(eval(as.name(method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)), + data=df, + type='p', + pch=19, + cex=0.35, + scales= list(relation="free", x=list(rot=0, cex=0.7, tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)), + xlab=NULL, main=NULL, ylab=NULL, ylim=ylimits, + as.table=T, + origin = 0, + horizontal=FALSE, + group=Polarity, + col=c("red","blue"), + par.strip.text = list(cex=0.7), + ...) + } else { + p = barchart(Counts~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0, + horizontal=FALSE, + group=Polarity, + stack=TRUE, + col=c('red', 'blue'), + scales=list(y=list(rot=90, relation="free", cex=0.7), x=list(rot=0, cex=0.7, axs="i", tck=c(1,0))), + xlab = NULL, + ylab = NULL, + main = NULL, + as.table=TRUE, + par.strip.text = list(cex=0.6), + ...) + p=combineLimits(p) + } + return(p) +} -second_plot = function(df, ...) { - #smR.prepanel=function(x,y,...) {; yscale=c(y*0, max(abs(y)));list(ylim=yscale);} - sizeplot = xyplot(eval(as.name(args$extra_plot_method))~Coordinate|factor(Dataset, levels=unique(Dataset))+factor(Chromosome, levels=unique(Chromosome)), - data=df, - type='p', - cex=0.35, - pch=19, - scales= list(relation="free", x=list(rot=0, cex=0, axs="i", tck=0.5), y=list(tick.number=4, rot=90, cex=0.7)), - xlab=NULL, main=NULL, ylab=NULL, - as.table=T, - origin = 0, - horizontal=FALSE, - group=Polarity, - col=c("darkred","darkblue"), - par.strip.text = list(cex=0.7), - ...) - combineLimits(sizeplot) - } +## function parameters -second_plot_size = function(df, ...) { -# smR.prepanel=function(x,y,...){; yscale=c(-max(abs(y)), max(abs(y)));list(ylim=yscale);} - bc= barchart(Count~as.factor(Size)|factor(Dataset, levels=unique(Dataset))+Chromosome, data = df, origin = 0, - horizontal=FALSE, -group=Polarity, -stack=TRUE, - col=c('red', 'blue'), - cex=0.75, - scales=list(y=list(tick.number=4, rot=90, relation="free", cex=0.7), x=list(cex=0.7) ), -# prepanel=smR.prepanel, - xlab = NULL, - ylab = NULL, - main = NULL, - as.table=TRUE, - newpage = T, - par.strip.text = list(cex=0.7), - ...) - combineLimits(bc) - } - +#par.settings.firstplot = list(layout.heights=list(top.padding=11, bottom.padding = -14)) +#par.settings.secondplot=list(layout.heights=list(top.padding=11, bottom.padding = -15), strip.background=list(col=c("lavender","deepskyblue"))) +par.settings.firstplot = list(layout.heights=list(top.padding=-2, bottom.padding=-2),strip.background=list(col=c("lightblue","lightgreen"))) +par.settings.secondplot=list(layout.heights=list(top.padding=-1, bottom.padding=-1),strip.background=list(col=c("lightblue","lightgreen"))) +title_first_method = list(Counts="Read Counts", Coverage="Coverage depths", Median="Median sizes", Mean="Mean sizes", Size="Size Distributions") +title_extra_method = list(Counts="Read Counts", Coverage="Coverage depths", Median="Median sizes", Mean="Mean sizes", Size="Size Distributions") +legend_first_method =list(Counts="Read count", Coverage="Coverage depth", Median="Median size", Mean="Mean size", Size="Read count") +legend_extra_method =list(Counts="Read count", Coverage="Coverage depth", Median="Median size", Mean="Mean size", Size="Read count") +bottom_first_method =list(Counts="Coordinates (nucleotides)",Coverage="Coordinates (nucleotides)", Median="Coordinates (nucleotides)", Mean="Coordinates (nucleotides)", Size="Sizes of reads") +bottom_extra_method =list(Counts="Coordinates (nucleotides)",Coverage="Coordinates (nucleotides)", Median="Coordinates (nucleotides)", Mean="Coordinates (nucleotides)", Size="Sizes of reads") -## end of functions - -## function parameters -par.settings.readmap=list(layout.heights=list(top.padding=0, bottom.padding=0), strip.background = list(col=c("lightblue","lightgreen")) ) -par.settings.size=list(layout.heights=list(top.padding=0, bottom.padding=0)) -graph_title=list(Coverage="Read Maps and Coverages", Median="Read Maps and Median sizes", Mean="Read Maps and Mean sizes", SizeDistribution="Read Maps and Size Distributions") -graph_legend=list(Coverage="Read counts / Coverage", Median="Read counts / Median size", Mean="Read counts / Mean size", SizeDistribution="Read counts") -graph_bottom=list(Coverage="Nucleotide coordinates", Median="Nucleotide coordinates", Mean="Nucleotide coordinates", Size="Read sizes / Nucleotide coordinates") -## end of function parameters' +## Plotting Functions -## GRAPHS - -if (n_genes > 5) {page_height_simple = 11.69; page_height_combi=11.69; rows_per_page=6} else { - rows_per_page= n_genes; page_height_simple = 2.5*n_genes; page_height_combi=page_height_simple*2 } -if (n_samples > 4) {page_width = 8.2677*n_samples/4} else {page_width = 8.2677*n_samples/2} # to test - - -pdf(file=args$output_pdf, paper="special", height=page_height_simple, width=page_width) -if (rows_per_page %% 2 != 0) { rows_per_page = rows_per_page + 1} -for (i in seq(1,n_genes,rows_per_page/2)) { +double_plot <- function(...) { + page_height = 15 + rows_per_page = 10 + graph_heights=c(40,30,40,30,40,30,40,30,40,30,10) + page_width=8.2677 * n_samples / 2 + pdf(file=args$output_pdf, paper="special", height=page_height, width=page_width) + for (i in seq(1,n_genes,rows_per_page/2)) { start=i end=i+rows_per_page/2-1 if (end>n_genes) {end=n_genes} - first_plot.list=lapply(per_gene_readmap[start:end], function(x) first_plot(x, strip=FALSE, par.settings=par.settings.readmap)) - if (args$extra_plot_method == "Size") { - second_plot.list=lapply(per_gene_size[start:end], function(x) second_plot_size(x, par.settings=par.settings.size)) - } - else { - second_plot.list=lapply(per_gene_size[start:end], function(x) second_plot(x, par.settings=par.settings.size)) - } - - - plot.list=rbind(second_plot.list, first_plot.list) - args_list=c(plot.list, list(nrow=rows_per_page+1, ncol=1, - top=textGrob(graph_title[[args$extra_plot_method]], gp=gpar(cex=1), just="top"), - left=textGrob(graph_legend[[args$extra_plot_method]], gp=gpar(cex=1), vjust=1, rot=90), - sub=textGrob(graph_bottom[[args$extra_plot_method]], gp=gpar(cex=1), just="bottom") - ) - ) -do.call(grid.arrange, args_list) + if (end-start+1 < 5) {graph_heights=c(rep(c(40,30),end-start+1),10,rep(c(40,30),5-(end-start+1)))} + first_plot.list = lapply(per_gene_readmap[start:end], function(x) update(useOuterStrips(plot_unit(x, par.settings=par.settings.secondplot), strip.left=strip.custom(par.strip.text = list(cex=0.5))))) + second_plot.list = lapply(per_gene_size[start:end], function(x) update(useOuterStrips(plot_unit(x, method=args$extra_plot_method, par.settings=par.settings.firstplot), strip.left=strip.custom(par.strip.text = list(cex=0.5)), strip=FALSE))) + plot.list=rbind(first_plot.list, second_plot.list) + args_list=c(plot.list, list( nrow=rows_per_page+1, ncol=1, heights=unit(graph_heights, rep("mm", 11)), + top=textGrob(paste(title_first_method[[args$first_plot_method]], "and", title_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), vjust=0, just="top"), + left=textGrob(paste(legend_first_method[[args$first_plot_method]], "/", legend_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), vjust=0, hjust=0, x=1, y=(-0.38/4)*(end-start-(3.28/0.38)), rot=90), + sub=textGrob(paste(bottom_first_method[[args$first_plot_method]], "/", bottom_extra_method[[args$extra_plot_method]]), gp=gpar(cex=1), just="bottom", vjust=2) + ) + ) + do.call(grid.arrange, args_list) + } + devname=dev.off() } -devname=dev.off() + +single_plot <- function(...) { + width = 8.2677 * n_samples / 2 + rows_per_page=8 + graph_heights=c(rep(40,8),10) + pdf(file=args$output_pdf, paper="special", height=15, width=width) + for (i in seq(1,n_genes,rows_per_page)) { + start=i + end=i+rows_per_page-1 + if (end>n_genes) {end=n_genes} + if (end-start+1 < 8) {graph_heights=c(rep(c(40),end-start+1),10,rep(c(40),8-(end-start+1)))} + first_plot.list = lapply(per_gene_readmap[start:end], function(x) update(useOuterStrips(plot_unit(x, par.settings=par.settings.firstplot),strip.left=strip.custom(par.strip.text = list(cex=0.5))))) + plot.list=rbind(first_plot.list) + args_list=c(plot.list, list( nrow=rows_per_page+1, ncol=1, heights=unit(graph_heights, rep("mm", 9)), + top=textGrob(title_first_method[[args$first_plot_method]], gp=gpar(cex=1), vjust=0, just="top"), + left=textGrob(legend_first_method[[args$first_plot_method]], gp=gpar(cex=1), vjust=0, hjust=0, x=1, y=(-0.41/7)*(end-start-(6.23/0.41)), rot=90), + sub=textGrob(bottom_first_method[[args$first_plot_method]], gp=gpar(cex=1), just="bottom", vjust=2) + ) + ) + do.call(grid.arrange, args_list) + } + devname=dev.off() +} + +# main + +if (args$extra_plot_method != '') { double_plot() } +if (args$extra_plot_method == '' & !exists('global', where=args)) { + single_plot() +} +if (exists('global', where=args)) { + pdf(file=args$output, paper="special", height=11.69) + Table <- within(Table, Counts[Polarity=="R"] <- abs(Counts[Polarity=="R"])) # retropedalage + library(reshape2) + ml = melt(Table, id.vars = c("Dataset", "Chromosome", "Polarity", "Size")) + if (args$global == "nomerge") { + castml = dcast(ml, Dataset+Polarity+Size ~ variable, function(x) sum(x)) + castml <- within(castml, Counts[Polarity=="R"] <- (Counts[Polarity=="R"]*-1)) + bc = globalbc(castml, global="no") + } else { + castml = dcast(ml, Dataset+Size ~ variable, function(x) sum(x)) + bc = globalbc(castml, global="yes") + } + plot(bc) + devname=dev.off() +} +
