diff deseq2.R @ 41:0a0a3388e3f2 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit cbeb1c4c436be04323bd9a809a6393d00b168d07"
author iuc
date Mon, 29 Nov 2021 18:16:10 +0000
parents b30a443484ab
children 6ef2cba4e35a
line wrap: on
line diff
--- a/deseq2.R	Fri Nov 19 21:03:18 2021 +0000
+++ b/deseq2.R	Mon Nov 29 18:16:10 2021 +0000
@@ -52,6 +52,7 @@
   "batch_factors", "w", 1, "character",
   "outfile", "o", 1, "character",
   "countsfile", "n", 1, "character",
+  "sizefactorsfile", "F", 1, "character",
   "rlogfile", "r", 1, "character",
   "vstfile", "v", 1, "character",
   "header", "H", 0, "logical",
@@ -217,6 +218,30 @@
 if (!is.null(opt$esf)) {
     dds <- estimateSizeFactors(dds, type = opt$esf)
 }
+
+# estimate size factors for each sample
+# - https://support.bioconductor.org/p/97676/
+if (!is.null(opt$sizefactorsfile)) {
+    nm <- assays(dds)[["avgTxLength"]]
+    if (!is.null(nm)) {
+        ## Recommended: takes into account tximport data
+        cat("\nsize factors for samples: taking tximport data into account\n")
+        size_factors <- estimateSizeFactorsForMatrix(counts(dds) / nm)
+    } else {
+        norm_factors <- normalizationFactors(dds)
+        if (!is.null(norm_factors)) {
+            ## In practice, gives same results as above.
+            cat("\nsize factors for samples: no tximport data, using derived normalization factors\n")
+            size_factors <- estimateSizeFactorsForMatrix(norm_factors)
+        } else {
+            ## If we have no other information, estimate from raw.
+            cat("\nsize factors for samples: no tximport data, no normalization factors, estimating from raw data\n")
+            size_factors <- estimateSizeFactorsForMatrix(counts(dds))
+        }
+    }
+    write.table(size_factors, file = opt$sizefactorsfile, sep = "\t", col.names = F, quote = FALSE)
+}
+
 apply_batch_factors <- function(dds, batch_factors) {
   rownames(batch_factors) <- batch_factors$identifier
   batch_factors <- subset(batch_factors, select = -c(identifier, condition))