annotate get_deseq_dataset.R @ 44:1cb33de18af5 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
author iuc
date Tue, 09 Dec 2025 17:43:58 +0000
parents 6ef2cba4e35a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
37
951876579237 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 9eb6d07510ccf27d6499172d62c81661078ec57b"
iuc
parents: 35
diff changeset
1 get_deseq_dataset <- function(sample_table, header, design_formula, tximport, txtype, tx2gene) {
44
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
2 dir <- ""
29
66203279224f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
3
44
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
4 has_header <- !is.null(header)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
5 use_txi <- !is.null(tximport)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
6 if (use_txi) {
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
7 if (is.null(tx2gene)) stop("A transcript-to-gene map or a GTF/GFF3 file is required for tximport")
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
8 if (tolower(file_ext(tx2gene)) == "gff") {
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
9 gff_file <- tx2gene
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
10 } else {
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
11 gff_file <- NULL
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
12 tx2gene <- read.table(tx2gene, header = has_header)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
13 }
29
66203279224f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
14 }
66203279224f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
15
44
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
16 if (!use_txi && has_header) {
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
17 countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
18 tbl <- do.call("cbind", countfiles)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
19 colnames(tbl) <- rownames(sample_table) # take sample ids from header
29
66203279224f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
20
44
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
21 # check for htseq report lines (from DESeqDataSetFromHTSeqCount function)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
22 old_special_names <- c(
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
23 "no_feature",
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
24 "ambiguous",
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
25 "too_low_aQual",
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
26 "not_aligned",
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
27 "alignment_not_unique"
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
28 )
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
29 special_rows <- (substr(rownames(tbl), 1, 1) == "_") | rownames(tbl) %in% old_special_names
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
30 tbl <- tbl[!special_rows, , drop = FALSE]
29
66203279224f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
31
44
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
32 dds <- DESeqDataSetFromMatrix(
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
33 countData = tbl,
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
34 colData = subset(sample_table, select = -filename),
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
35 design = design_formula
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
36 )
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
37 } else if (!use_txi && !has_header) {
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
38 # construct the object from HTSeq files
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
39 dds <- DESeqDataSetFromHTSeqCount(
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
40 sampleTable = sample_table,
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
41 directory = dir,
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
42 design = design_formula
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
43 )
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
44 colnames(dds) <- row.names(sample_table)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
45 } else {
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
46 # construct the object using tximport
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
47 library("tximport")
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
48 txi_files <- as.character(sample_table$filename)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
49 labs <- row.names(sample_table)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
50 names(txi_files) <- labs
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
51 if (!is.null(gff_file)) {
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
52 # first need to make the tx2gene table
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
53 # this takes ~2-3 minutes using Bioconductor functions
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
54 suppressPackageStartupMessages({
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
55 library("GenomicFeatures")
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
56 })
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
57 txdb <- makeTxDbFromGFF(gff_file)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
58 k <- keys(txdb, keytype = "TXNAME")
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
59 tx2gene <- select(txdb, keys = k, columns = "GENEID", keytype = "TXNAME")
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
60 # Remove 'transcript:' from transcript IDs (when gff_file is a GFF3 from Ensembl and the transcript does not have a Name)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
61 tx2gene$TXNAME <- sub("^transcript:", "", tx2gene$TXNAME) # nolint
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
62 }
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
63 try(txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene))
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
64 if (!exists("txi")) {
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
65 # Remove version from transcript IDs in tx2gene...
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
66 tx2gene$TXNAME <- sub("\\.[0-9]+$", "", tx2gene$TXNAME) # nolint
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
67 # ...and in txi_files
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
68 txi <- tximport(txi_files, type = txtype, tx2gene = tx2gene, ignoreTxVersion = TRUE)
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
69 }
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
70 dds <- DESeqDataSetFromTximport(
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
71 txi,
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
72 subset(sample_table, select = -c(filename)),
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
73 design_formula
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
74 )
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
75 }
1cb33de18af5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit 5bf5011af827e93ddeecbfba4815fe9b85f02594
iuc
parents: 42
diff changeset
76 return(dds)
29
66203279224f planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit b95582cea8320d5488056a9576474f79cec53be8
iuc
parents:
diff changeset
77 }