view scripts/contamination_plot.R @ 1:78341ccbad0a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit f967afe562781e5c8ed4e24e9d1e0bc3ebb29401
author iuc
date Mon, 03 Jun 2019 14:54:55 -0400
parents fd938aa845d0
children e20001675838
line wrap: on
line source

#!/usr/bin/env R

suppressPackageStartupMessages(require(ggplot2))

log10histoPlot <- function(title="", columncounts){
    #' Log10 histogram plot
    #'
    #' @param columncounts colSums(input_matrix)
    #' @param title Title of plot
    #' @return ggplot grob
    dfer <- data.frame(colcounts=log10(columncounts))
    p1 <- ggplot(dfer, aes(x=colcounts)) +
        geom_histogram(binwidth = 0.05, color="black",fill="white") +
        theme(plot.title = element_text(hjust = 0.5)) +
        labs(title=title, y="Frequency", x="Library Size (Log10)")

    return(p1)
}

## This is calculated by the first call to contaminationPlot
## and then re-used by the second call.
ylim.max = NULL

contaminationPlot <- function(title, columndata, barcode.data, plate.data, RAW)
{
    coldata = data.frame(colsums=columndata)
    maxval = max(coldata)
    # Set once and once only
    if (is.null(ylim.max)){
        ylim.max <<- maxval + 500
    }

    drawPlates <- function(plate.data){ # 0 384 768

        plate.boundaries <- plate.data["filtered.plates",]
        if (RAW){
            plate.boundaries <- plate.data["unfilter.plates",]
        }

        plate.minyval <- -200
        plate.color <- "grey"
        plate.text.color <- "red"
        plate.text.alpha <- 0.5
        plate.height <- 2* maxval / 5
        plate.spacing <- 12
        plate.height.text <- plate.height - 3000
        plate.text.size <- 3


        zzz <- lapply(names(plate.boundaries), function(plate.name){
            plate.num = as.integer(sub("P","",plate.name))
            plate.xval = plate.boundaries[[plate.name]]

            ## If not first plate -- inner boundary, print next plate name to the right
            if (plate.name != names(plate.boundaries)[length(plate.boundaries)]){
                gplot <<- gplot + annotate("text", x=plate.xval + plate.spacing, label=paste("Plate", plate.num + 1),
                                           size=plate.text.size, y=plate.height*0.7, angle=-90,
                                           color=plate.text.color, alpha=plate.text.alpha)
            }
            ## If not last plate -- inner boundary, print previous name to the left
            if (plate.name != names(plate.boundaries)[1]){
                gplot <<- gplot + annotate("text", x=plate.xval - plate.spacing, label=paste("Plate", plate.num),
                                           size=plate.text.size, y=plate.height*0.7, angle=90,
                                           color=plate.text.color, alpha=plate.text.alpha)
            }
            gplot <<- gplot + geom_segment(aes(x=plate.xval, xend=plate.xval, y=plate.minyval, yend=plate.height), color=plate.color, lty=1, size=1)
        })

        ## Connecting bar underneath
        gplot <<- gplot + geom_segment(aes(x=min(unname(unlist(plate.boundaries))), xend=max(unname(unlist(plate.boundaries))),
                                           y=plate.minyval, yend=plate.minyval), color=plate.color, lty=1, size=1)
    }


    drawBatches <- function(barcode.data, plate.data){
        batch.minyval = -200
        divide.color <- "grey"
        divide.style <- 2 ## dashed
        divide.size <- 0.3
        boundary.color <- "blue"
        boundary.style <- 1
        boundary.size <- 0.5
        divide.text.true.color <- "blue"
        divide.text.false.color <- "black"
        divide.text.name.color <- "grey"
        batch.text.height <- maxval * 4/5
        batch.text.tf.height <- maxval * 2/5
        batch.text.tf.spacing <- 12
        batch.text.alpha <- 0.8
        batch.text.size <- 3
        batch.label.text.size <- 6
        batch.height <- maxval * 4/5
        divide.height <- maxval * 4/5
        batch.spacing <- 24

        boundary.pos <- barcode.data["filtered_positions",]
        plate.pos <- unname(unlist(plate.data["filtered.plates",])) ## just want values

        if (RAW){
            boundary.pos <- barcode.data["unfilter_positions",]
            divider.pos <- barcode.data["filter_in_unfilter",]
            plate.pos <- unname(unlist(plate.data["unfilter.plates",])) ## just want values
        }


        zzz <- lapply(names(boundary.pos), function(batch.name){
            batch.num = as.integer(sub("B","",batch.name))
            batch.xval = unname(unlist(boundary.pos[[batch.name]]))

            # Plot boundary line only if not intersecting with a plate line
            if (!(batch.xval %in% plate.pos)){
                gplot <<- gplot + geom_segment(aes(x=batch.xval, xend=batch.xval, y=batch.minyval, yend=batch.height),
                                               color=boundary.color, lty=boundary.style, size=boundary.size)
            }

            ## Plot labels (except P0)
            if (batch.name != "B0"){
                if (RAW){
                    divide.xval = unname(unlist(divider.pos[[batch.name]]))

                    ## Plot the divider line
                    gplot <<- gplot + geom_segment(aes(x=divide.xval, xend=divide.xval, y=batch.minyval, yend=divide.height),
                                               color=divide.color, lty=divide.style, size=divide.size)

                    ## Plot the True/False labels
                    gplot <<- gplot + annotate("text", x=divide.xval - batch.text.tf.spacing, label="True positives",
                                               size=batch.text.size, y=batch.text.tf.height, angle=+90,
                                               color=divide.text.true.color, alpha=batch.text.alpha)
                    gplot <<- gplot + annotate("text", x=divide.xval + batch.text.tf.spacing, label="False positives",
                                               size=batch.text.size, y=batch.text.tf.height, angle=-90,
                                               color=divide.text.false.color, alpha=batch.text.alpha)
                    ## Plot the Batch names
                    gplot <<- gplot + annotate("text", x=divide.xval, label=batch.name,
                                               size=batch.label.text.size, y=batch.text.height, angle=0,
                                               color = divide.text.name.color, alpha=batch.text.alpha)
                } else {
                    ## Plot the Batch names between blue lines
                    gplot <<- gplot + annotate("text", x=batch.xval, label=batch.name,
                                               size=batch.label.text.size, y=batch.text.height, angle=0,
                                               color = divide.text.name.color, alpha=batch.text.alpha)
                }
            }
        })
    }


    plotCells <- function(coldata){
        gplot <<- gplot + geom_point(data=coldata, aes(x=1:nrow(coldata), y=coldata$colsums), pch = 16, cex = 1)
    }

    plotTheme <- function(){
        gplot <<- gplot + theme(plot.title = element_text(hjust = 0.5),
                                axis.ticks.x=element_blank(), axis.ticks.y=element_blank(),
                                axis.text.x=element_blank()) +
            labs(title=paste("Contamination Plot\n", title), y="Library Size", x="Barcode Index") +
            coord_cartesian(ylim = c(-200, ylim.max)) +
            scale_x_continuous(breaks=NULL)
    }


    gplot <- ggplot()
    drawPlates(plate.data)
    drawBatches(barcode.data, plate.data)
    plotCells(coldata)
    plotTheme()

    return(gplot)
}