Mercurial > repos > iuc > deseq2
diff get_deseq_dataset.R @ 44:1cb33de18af5 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
| author | iuc |
|---|---|
| date | Tue, 09 Dec 2025 17:43:58 +0000 |
| parents | 6ef2cba4e35a |
| children |
line wrap: on
line diff
--- a/get_deseq_dataset.R Tue Jul 18 14:58:28 2023 +0000 +++ b/get_deseq_dataset.R Tue Dec 09 17:43:58 2025 +0000 @@ -1,80 +1,77 @@ get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) { - - dir <- "" + dir <- "" - has_header <- !is.null(header) - use_txi <- !is.null(tximport) - if (use_txi) { - if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") - if (tolower(file_ext(tx2gene)) == "gff") { - gff_file <- tx2gene - } else { - gff_file <- NULL - tx2gene <- read.table(tx2gene, header = has_header) + has_header <- !is.null(header) + use_txi <- !is.null(tximport) + if (use_txi) { + if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") + if (tolower(file_ext(tx2gene)) == "gff") { + gff_file <- tx2gene + } else { + gff_file <- NULL + tx2gene <- read.table(tx2gene, header = has_header) + } } - } - if (!use_txi && has_header) { - countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) - tbl <- do.call("cbind", countfiles) - colnames(tbl) <- rownames(sample_table) # take sample ids from header + if (!use_txi && has_header) { + countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) + tbl <- do.call("cbind", countfiles) + colnames(tbl) <- rownames(sample_table) # take sample ids from header - # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) - old_special_names <- c( - "no_feature", - "ambiguous", - "too_low_aQual", - "not_aligned", - "alignment_not_unique" - ) - special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names - tbl <- tbl[!special_rows, , drop = FALSE] - - dds <- DESeqDataSetFromMatrix( - countData = tbl, - colData = subset(sample_table, select = -filename), - design = design_formula - ) - } else if (!use_txi && !has_header) { + # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) + old_special_names <- c( + "no_feature", + "ambiguous", + "too_low_aQual", + "not_aligned", + "alignment_not_unique" + ) + special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names + tbl <- tbl[!special_rows, , drop = FALSE] - # construct the object from HTSeq files - dds <- DESeqDataSetFromHTSeqCount( - sampleTable = sample_table, - directory = dir, - design = design_formula - ) - colnames(dds) <- row.names(sample_table) - - } else { - # construct the object using tximport - library("tximport") - txi_files <- as.character(sample_table$filename) - labs <- row.names(sample_table) - names(txi_files) <- labs - if (!is.null(gff_file)) { - # first need to make the tx2gene table - # this takes ~2-3 minutes using Bioconductor functions - suppressPackageStartupMessages({ - library("GenomicFeatures") - }) - txdb <- makeTxDbFromGFF(gff_file) - k <- keys(txdb, keytype = "TXNAME") - tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME") - # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name) - tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint - } - try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene)) - if (!exists("txi")) { - # Remove version from transcript IDs in tx2gene... - tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint - # ...and in txi_files - txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE) - } - dds <- DESeqDataSetFromTximport( - txi, - subset(sample_table, select = -c(filename)), - design_formula - ) - } - return(dds) + dds <- DESeqDataSetFromMatrix( + countData = tbl, + colData = subset(sample_table, select = -filename), + design = design_formula + ) + } else if (!use_txi && !has_header) { + # construct the object from HTSeq files + dds <- DESeqDataSetFromHTSeqCount( + sampleTable = sample_table, + directory = dir, + design = design_formula + ) + colnames(dds) <- row.names(sample_table) + } else { + # construct the object using tximport + library("tximport") + txi_files <- as.character(sample_table$filename) + labs <- row.names(sample_table) + names(txi_files) <- labs + if (!is.null(gff_file)) { + # first need to make the tx2gene table + # this takes ~2-3 minutes using Bioconductor functions + suppressPackageStartupMessages({ + library("GenomicFeatures") + }) + txdb <- makeTxDbFromGFF(gff_file) + k <- keys(txdb, keytype = "TXNAME") + tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME") + # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name) + tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint + } + try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene)) + if (!exists("txi")) { + # Remove version from transcript IDs in tx2gene... + tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint + # ...and in txi_files + txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE) + } + dds <- DESeqDataSetFromTximport( + txi, + subset(sample_table, select = -c(filename)), + design_formula + ) + } + return(dds) }
