view scripts/reorder_matrix_headers.R @ 2:e20001675838 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/crosscontamination_barcode_filter commit 82a0fd493f5866b3ef65019709ae5c865998f802
author iuc
date Wed, 12 Jun 2019 04:57:52 -0400
parents 78341ccbad0a
children
line wrap: on
line source

#!/usr/bin/env R
##
## Reorder matrix
##
convertHeadersToSensible <- function(regex.from, regex.to, col.names){
    #' Strips headers of filenames and sets plate, batch, and barcodes
    #'
    #' @param regex.from format to extract plate, batch, and barcodes
    #' @param regex.to format to set
    #' @param matrix input matrix to rename headers
    #' @return updated names
    return(sub(regex.from, regex.to, col.names))
}

reorderMatrixHeaders <- function(barcodes, count.matrix, barcode.format, plate.format, sort.cells){
    #' Reorder headers to segment wanted and unwanted barcodes on opposite sides
    #' of each batch
    #'
    #' @param barcodes list of full barcodes
    #' @param count.matrix input matrix
    #' @param barcode.format batch list specifying valid barcodes for each batch
    #' @param plate.format plate list specifying plate format for each batch
    #' @param sort.cells sort cells by sizes
    #' @return list of all barcodes sorted bilaterally by batch, and true barcodes

    fixed.headers <- colnames(count.matrix)

    batch.ordering <- list()
    batch.ordering.filtered <- list()

    res <- sapply(names(barcode.format), function(key){
        rng <- as.integer(unlist(strsplit(key, '-')))
        ranges <- seq(rng[1],rng[2])

        # Barcodes wanted and unwanted for this range of batches
        barc.wanted <- barcodes[ranges]
        barc.unwant <- barcodes[!(barcodes %in% barc.wanted)]

        sub.batches <- barcode.format[[key]]  # 1,3,5,7 or 2,4,6,8

        res2 <- lapply(sub.batches, function(bat){
            batch.match <- paste("_B",bat,"_",sep="")
            headers.in.batch <- fixed.headers[grepl(batch.match, fixed.headers)]
            barcodes.in.batch <- sub(".*_([ATCGN]+)$", "\\1", headers.in.batch)

            headers.in.batch.wanted <- headers.in.batch[barcodes.in.batch %in% barc.wanted]
            headers.in.batch.unwant <- headers.in.batch[barcodes.in.batch %in% barc.unwant]

            if (sum(headers.in.batch.wanted %in% headers.in.batch.unwant) > 0){
                stop("Barcode given twice!", headers.in.batch.wanted[headers.in.batch.wanted %in% headers.in.batch.unwant])
            }

            ## Perform cell sorting if desired
            if (sort.cells){
                wanted.sorted.n <- names(sort(colSums(count.matrix[headers.in.batch.wanted])))
                unwant.sorted.n <- names(sort(colSums(count.matrix[headers.in.batch.unwant])))

                headers.in.batch.wanted <- wanted.sorted.n
                headers.in.batch.unwant <- unwant.sorted.n
            }

            ## False on the left, True on the right
            headers.in.batch.neworder <- c(headers.in.batch.unwant, headers.in.batch.wanted)
            batch.name <- paste("B", bat, sep="")

            batch.ordering[[batch.name]] <<- headers.in.batch.neworder
            batch.ordering.filtered[[batch.name]] <<- headers.in.batch.wanted
        })
    })
    ## Now we have sorted all our barcodes in each batch to the correct order
    ## we just have to sort the batches into the correct order according to plating setup
    barcode.ordering <- c()
    barcode.ordering.filtered <- c()

    res <- sapply(sortedBatchesOrPlates(names(plate.format)), function(plate.num){
        batches <- plate.format[[plate.num]]

        ## Preserve batch order on plates
        res2 <- sapply(batches, function(batch.num){
            batch.name <- paste("B", batch.num, sep="")
            barcs <- batch.ordering[[batch.name]]
            barcs.filtered <- batch.ordering.filtered[[batch.name]]

            barcode.ordering <<- c(barcode.ordering, barcs)
            barcode.ordering.filtered <<- c(barcode.ordering.filtered, barcs.filtered)
        })
    })

    return(list(
        all=barcode.ordering,
        filtered=barcode.ordering.filtered,
        debug.barcodes = batch.ordering
    ))
}