Mercurial > repos > iuc > crosscontamination_barcode_filter
view scripts/batch_plotting_functions.R @ 2:e20001675838 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit 82a0fd493f5866b3ef65019709ae5c865998f802
author | iuc |
---|---|
date | Wed, 12 Jun 2019 04:57:52 -0400 |
parents | 78341ccbad0a |
children |
line wrap: on
line source
#!/usr/bin/env R suppressPackageStartupMessages(require(data.table)) ## ## Batch Plotting Functions ## sortedBatchesOrPlates <- function(batch.list){ #' B0, B1, B11, B12, B2, B3, ... #' to B0,B1,B2,---B11,B12 vals.and.index = sort(as.integer(sub("^[BP]", "", batch.list)), index.return=TRUE) return(batch.list[vals.and.index$ix]) } calculateBarcodePositions <- function(barcode.form, full.barcode.size){ #' Determine x-axis positions of all batches under the context of #' unfiltered barcodes (full set), filtered (real set), dividing line #' (the position of real set in the full set). #' #' @param barcode.form list of barcode formats and the batches they map to #' @param full.barcode.size size of all barcodes #' @return dataframe of batch information: sizes, unfiltered, filtered, #' and dividing line. sizes <- list(B0=0) res <- sapply(names(barcode.form), function(key){ rng <- as.integer(unlist(strsplit(key, '-'))) size.of.range <- length(seq(rng[1],rng[2])) sub.batches <- barcode.form[[key]] # 1,3,5,7 or 2,4,6,8 res2 <- lapply(sub.batches, function(bat){ sizes[[paste("B",bat, sep="")]] <<- size.of.range }) }) ## We now have sizes per batch, in order of batch ## Need to place these at positions after each full barcode size ## Below we have "positions" which has the END positions of each batch under ## the assumption of using full barcodes. The "real_positions" contains the ## END positions of each batch under the assumption of using only the real ## subsetted barcodes. unfilter_positions <- list(B0=0) filtered_positions <- list(B0=0) filter_in_unfilter <- list(B0=0) ## dividing line between real and false barcodes in each batch res <- sapply(sortedBatchesOrPlates(names(sizes)), function(batch.name){ batch.num <- as.integer(sub("B","", batch.name)) if (batch.num > 0){ batch.size <- sizes[[batch.name]] ## 96 batch.name.previous = paste("B", batch.num-1, sep="") batch.start <- unfilter_positions[[batch.name.previous]] filt.batch.start <- filtered_positions[[batch.name.previous]] unfilter_positions[[batch.name]] <<- batch.start + full.barcode.size filtered_positions[[batch.name]] <<- filt.batch.start + batch.size filter_in_unfilter[[batch.name]] <<- batch.start + batch.size } }) # Put into a dataframe, merging lists on their common names dd <- data.frame(rbindlist(list( unfilter_positions=unfilter_positions, filter_in_unfilter=filter_in_unfilter, filtered_positions=filtered_positions, sizes=sizes), ## sizes go last to not mess up the column name ordering use.names = TRUE, idcol = TRUE)) rownames(dd) <- dd$.id dd <- dd[,!(colnames(dd) %in% ".id")] return(dd) } calculatePlatePositions <- function(plate.form, full.barcode.size, all.batch.data){ #' Determine the x-axis plate positions for each of the unfiltered and filtered sets #' #' Given the true size of each batch, and which batches exist in which plates #' calculate the size of each plate #' #' @param plate.form list of vectors mapping plates to batches #' @param full.barcode.size size of the full set of barcodes #' @param all.batch.data the output of 'calculateBarcodePositions' #' @return dataframe of plate information pertaining to positions and sizes of plates unfilter.plates = list(P0=0) filtered.plates = list(P0=0) unfilter.plates.sizes = list(P0=0) filtered.plates.sizes = list(P0=0) res <- sapply(sortedBatchesOrPlates(names(plate.form)), function(plate.num){ unfilter.plate.size = 0 filtered.plate.size = 0 batches <- plate.form[[plate.num]] res2 <- sapply(sort(batches), function(batch.num){ batch.size <- all.batch.data["sizes",paste("B", batch.num, sep="")] unfilter.plate.size <<- unfilter.plate.size + full.barcode.size filtered.plate.size <<- filtered.plate.size + batch.size }) plate.name = paste("P", plate.num, sep="") plate.name.previous = paste("P", as.integer(plate.num) - 1, sep="") unfilter.plates.sizes[[plate.name]] <<- unfilter.plate.size filtered.plates.sizes[[plate.name]] <<- filtered.plate.size filtered.plates[[plate.name]] <<- filtered.plates[[plate.name.previous]] + filtered.plate.size unfilter.plates[[plate.name]] <<- unfilter.plates[[plate.name.previous]] + unfilter.plate.size }) # Put into a dataframe, merging lists on their common names dd <- data.frame(rbindlist(list( unfilter.plates=unfilter.plates, unfilter.plates.sizes=unfilter.plates.sizes, filtered.plates=filtered.plates, filtered.plates.sizes=filtered.plates.sizes), use.names = TRUE, idcol = TRUE)) rownames(dd) <- dd$.id dd <- dd[,!(colnames(dd) %in% ".id")] return(dd) }