Mercurial > repos > iuc > deseq2
comparison get_deseq_dataset.R @ 44:1cb33de18af5 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
| author | iuc |
|---|---|
| date | Tue, 09 Dec 2025 17:43:58 +0000 |
| parents | 6ef2cba4e35a |
| children |
comparison
equal
deleted
inserted
replaced
| 43:621ddf5967d2 | 44:1cb33de18af5 |
|---|---|
| 1 get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) { | 1 get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) { |
| 2 dir <- "" | |
| 2 | 3 |
| 3 dir <- "" | 4 has_header <- !is.null(header) |
| 5 use_txi <- !is.null(tximport) | |
| 6 if (use_txi) { | |
| 7 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") | |
| 8 if (tolower(file_ext(tx2gene)) == "gff") { | |
| 9 gff_file <- tx2gene | |
| 10 } else { | |
| 11 gff_file <- NULL | |
| 12 tx2gene <- read.table(tx2gene, header = has_header) | |
| 13 } | |
| 14 } | |
| 4 | 15 |
| 5 has_header <- !is.null(header) | 16 if (!use_txi && has_header) { |
| 6 use_txi <- !is.null(tximport) | 17 countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) |
| 7 if (use_txi) { | 18 tbl <- do.call("cbind", countfiles) |
| 8 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport") | 19 colnames(tbl) <- rownames(sample_table) # take sample ids from header |
| 9 if (tolower(file_ext(tx2gene)) == "gff") { | 20 |
| 10 gff_file <- tx2gene | 21 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) |
| 22 old_special_names <- c( | |
| 23 "no_feature", | |
| 24 "ambiguous", | |
| 25 "too_low_aQual", | |
| 26 "not_aligned", | |
| 27 "alignment_not_unique" | |
| 28 ) | |
| 29 special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names | |
| 30 tbl <- tbl[!special_rows, , drop = FALSE] | |
| 31 | |
| 32 dds <- DESeqDataSetFromMatrix( | |
| 33 countData = tbl, | |
| 34 colData = subset(sample_table, select = -filename), | |
| 35 design = design_formula | |
| 36 ) | |
| 37 } else if (!use_txi && !has_header) { | |
| 38 # construct the object from HTSeq files | |
| 39 dds <- DESeqDataSetFromHTSeqCount( | |
| 40 sampleTable = sample_table, | |
| 41 directory = dir, | |
| 42 design = design_formula | |
| 43 ) | |
| 44 colnames(dds) <- row.names(sample_table) | |
| 11 } else { | 45 } else { |
| 12 gff_file <- NULL | 46 # construct the object using tximport |
| 13 tx2gene <- read.table(tx2gene, header = has_header) | 47 library("tximport") |
| 48 txi_files <- as.character(sample_table$filename) | |
| 49 labs <- row.names(sample_table) | |
| 50 names(txi_files) <- labs | |
| 51 if (!is.null(gff_file)) { | |
| 52 # first need to make the tx2gene table | |
| 53 # this takes ~2-3 minutes using Bioconductor functions | |
| 54 suppressPackageStartupMessages({ | |
| 55 library("GenomicFeatures") | |
| 56 }) | |
| 57 txdb <- makeTxDbFromGFF(gff_file) | |
| 58 k <- keys(txdb, keytype = "TXNAME") | |
| 59 tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME") | |
| 60 # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name) | |
| 61 tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint | |
| 62 } | |
| 63 try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene)) | |
| 64 if (!exists("txi")) { | |
| 65 # Remove version from transcript IDs in tx2gene... | |
| 66 tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint | |
| 67 # ...and in txi_files | |
| 68 txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE) | |
| 69 } | |
| 70 dds <- DESeqDataSetFromTximport( | |
| 71 txi, | |
| 72 subset(sample_table, select = -c(filename)), | |
| 73 design_formula | |
| 74 ) | |
| 14 } | 75 } |
| 15 } | 76 return(dds) |
| 16 | |
| 17 if (!use_txi && has_header) { | |
| 18 countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1) | |
| 19 tbl <- do.call("cbind", countfiles) | |
| 20 colnames(tbl) <- rownames(sample_table) # take sample ids from header | |
| 21 | |
| 22 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function) | |
| 23 old_special_names <- c( | |
| 24 "no_feature", | |
| 25 "ambiguous", | |
| 26 "too_low_aQual", | |
| 27 "not_aligned", | |
| 28 "alignment_not_unique" | |
| 29 ) | |
| 30 special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names | |
| 31 tbl <- tbl[!special_rows, , drop = FALSE] | |
| 32 | |
| 33 dds <- DESeqDataSetFromMatrix( | |
| 34 countData = tbl, | |
| 35 colData = subset(sample_table, select = -filename), | |
| 36 design = design_formula | |
| 37 ) | |
| 38 } else if (!use_txi && !has_header) { | |
| 39 | |
| 40 # construct the object from HTSeq files | |
| 41 dds <- DESeqDataSetFromHTSeqCount( | |
| 42 sampleTable = sample_table, | |
| 43 directory = dir, | |
| 44 design = design_formula | |
| 45 ) | |
| 46 colnames(dds) <- row.names(sample_table) | |
| 47 | |
| 48 } else { | |
| 49 # construct the object using tximport | |
| 50 library("tximport") | |
| 51 txi_files <- as.character(sample_table$filename) | |
| 52 labs <- row.names(sample_table) | |
| 53 names(txi_files) <- labs | |
| 54 if (!is.null(gff_file)) { | |
| 55 # first need to make the tx2gene table | |
| 56 # this takes ~2-3 minutes using Bioconductor functions | |
| 57 suppressPackageStartupMessages({ | |
| 58 library("GenomicFeatures") | |
| 59 }) | |
| 60 txdb <- makeTxDbFromGFF(gff_file) | |
| 61 k <- keys(txdb, keytype = "TXNAME") | |
| 62 tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME") | |
| 63 # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name) | |
| 64 tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint | |
| 65 } | |
| 66 try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene)) | |
| 67 if (!exists("txi")) { | |
| 68 # Remove version from transcript IDs in tx2gene... | |
| 69 tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint | |
| 70 # ...and in txi_files | |
| 71 txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE) | |
| 72 } | |
| 73 dds <- DESeqDataSetFromTximport( | |
| 74 txi, | |
| 75 subset(sample_table, select = -c(filename)), | |
| 76 design_formula | |
| 77 ) | |
| 78 } | |
| 79 return(dds) | |
| 80 } | 77 } |
