Mercurial > repos > iuc > deseq2
comparison deseq2.R @ 26:47267a044ef1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/deseq2 commit e811a7887db870f4f94f620f52bce656c8d5ba23
| author | iuc |
|---|---|
| date | Thu, 12 Apr 2018 17:29:07 -0400 |
| parents | 1a34aec44d60 |
| children | dc6bc19f05ab |
comparison
equal
deleted
inserted
replaced
| 25:a1aee7e3d092 | 26:47267a044ef1 |
|---|---|
| 1 #!/usr/bin/env Rscript | 1 #!/usr/bin/env Rscript |
| 2 | 2 |
| 3 # A command-line interface to DESeq2 for use with Galaxy | 3 # A command-line interface to DESeq2 for use with Galaxy |
| 4 # written by Bjoern Gruening and modified by Michael Love 2016.03.30 | 4 # written by Bjoern Gruening and modified by Michael Love 2016.03.30 |
| 5 # | 5 # |
| 6 # one of these arguments is required: | 6 # This argument is required: |
| 7 # | 7 # |
| 8 # 'factors' a JSON list object from Galaxy | 8 # 'factors' a JSON list object from Galaxy |
| 9 # | |
| 10 # 'sample_table' is a sample table as described in ?DESeqDataSetFromHTSeqCount | |
| 11 # with columns: sample name, filename, then factors (variables) | |
| 12 # | 9 # |
| 13 # the output file has columns: | 10 # the output file has columns: |
| 14 # | 11 # |
| 15 # baseMean (mean normalized count) | 12 # baseMean (mean normalized count) |
| 16 # log2FoldChange (by default a moderated LFC estimate) | 13 # log2FoldChange (by default a moderated LFC estimate) |
| 17 # lfcSE (the standard error) | 14 # lfcSE (the standard error) |
| 18 # stat (the Wald statistic) | 15 # stat (the Wald statistic) |
| 19 # pvalue (p-value from comparison of Wald statistic to a standard Normal) | 16 # pvalue (p-value from comparison of Wald statistic to a standard Normal) |
| 20 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) | 17 # padj (adjusted p-value, Benjamini Hochberg correction on genes which pass the mean count filter) |
| 21 # | 18 # |
| 22 # the first variable in 'factors' and first column in 'sample_table' will be the primary factor. | 19 # the first variable in 'factors' will be the primary factor. |
| 23 # the levels of the primary factor are used in the order of appearance in factors or in sample_table. | 20 # the levels of the primary factor are used in the order of appearance in factors. |
| 24 # | 21 # |
| 25 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' | 22 # by default, levels in the order A,B,C produces a single comparison of B vs A, to a single file 'outfile' |
| 26 # | 23 # |
| 27 # for the 'many_contrasts' flag, levels in the order A,B,C produces comparisons C vs A, B vs A, C vs B, | 24 # for the 'many_contrasts' flag, levels in the order A,B,C produces comparisons C vs A, B vs A, C vs B, |
| 28 # to a number of files using the 'outfile' prefix: 'outfile.condition_C_vs_A' etc. | 25 # to a number of files using the 'outfile' prefix: 'outfile.condition_C_vs_A' etc. |
| 52 "outfile", "o", 1, "character", | 49 "outfile", "o", 1, "character", |
| 53 "countsfile", "n", 1, "character", | 50 "countsfile", "n", 1, "character", |
| 54 "factors", "f", 1, "character", | 51 "factors", "f", 1, "character", |
| 55 "files_to_labels", "l", 1, "character", | 52 "files_to_labels", "l", 1, "character", |
| 56 "plots" , "p", 1, "character", | 53 "plots" , "p", 1, "character", |
| 57 "sample_table", "s", 1, "character", | |
| 58 "tximport", "i", 0, "logical", | 54 "tximport", "i", 0, "logical", |
| 59 "txtype", "y", 1, "character", | 55 "txtype", "y", 1, "character", |
| 60 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) | 56 "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF) |
| 61 "fit_type", "t", 1, "integer", | 57 "fit_type", "t", 1, "integer", |
| 62 "many_contrasts", "m", 0, "logical", | 58 "many_contrasts", "m", 0, "logical", |
| 77 # enforce the following required arguments | 73 # enforce the following required arguments |
| 78 if (is.null(opt$outfile)) { | 74 if (is.null(opt$outfile)) { |
| 79 cat("'outfile' is required\n") | 75 cat("'outfile' is required\n") |
| 80 q(status=1) | 76 q(status=1) |
| 81 } | 77 } |
| 82 if (is.null(opt$sample_table) & is.null(opt$factors)) { | 78 if (is.null(opt$factors)) { |
| 83 cat("'factors' or 'sample_table' is required\n") | 79 cat("'factors' is required\n") |
| 84 q(status=1) | 80 q(status=1) |
| 85 } | 81 } |
| 86 | 82 |
| 87 verbose <- if (is.null(opt$quiet)) { | 83 verbose <- if (is.null(opt$quiet)) { |
| 88 TRUE | 84 TRUE |
| 112 # build or read sample table | 108 # build or read sample table |
| 113 | 109 |
| 114 trim <- function (x) gsub("^\\s+|\\s+$", "", x) | 110 trim <- function (x) gsub("^\\s+|\\s+$", "", x) |
| 115 | 111 |
| 116 # switch on if 'factors' was provided: | 112 # switch on if 'factors' was provided: |
| 117 if (!is.null(opt$factors)) { | 113 library("rjson") |
| 118 library("rjson") | 114 parser <- newJSONParser() |
| 119 parser <- newJSONParser() | 115 parser$addData(opt$factors) |
| 120 parser$addData(opt$factors) | 116 factorList <- parser$getObject() |
| 121 factorList <- parser$getObject() | 117 filenames_to_labels <- fromJSON(opt$files_to_labels) |
| 122 filenames_to_labels <- fromJSON(opt$files_to_labels) | 118 factors <- sapply(factorList, function(x) x[[1]]) |
| 123 factors <- sapply(factorList, function(x) x[[1]]) | 119 primaryFactor <- factors[1] |
| 124 primaryFactor <- factors[1] | 120 filenamesIn <- unname(unlist(factorList[[1]][[2]])) |
| 125 filenamesIn <- unname(unlist(factorList[[1]][[2]])) | 121 labs = unname(unlist(filenames_to_labels[basename(filenamesIn)])) |
| 126 labs = unname(unlist(filenames_to_labels[basename(filenamesIn)])) | 122 sampleTable <- data.frame(sample=basename(filenamesIn), |
| 127 sampleTable <- data.frame(sample=basename(filenamesIn), | 123 filename=filenamesIn, |
| 128 filename=filenamesIn, | 124 row.names=filenamesIn, |
| 129 row.names=filenamesIn, | 125 stringsAsFactors=FALSE) |
| 130 stringsAsFactors=FALSE) | 126 for (factor in factorList) { |
| 131 for (factor in factorList) { | 127 factorName <- trim(factor[[1]]) |
| 132 factorName <- trim(factor[[1]]) | 128 sampleTable[[factorName]] <- character(nrow(sampleTable)) |
| 133 sampleTable[[factorName]] <- character(nrow(sampleTable)) | 129 lvls <- sapply(factor[[2]], function(x) names(x)) |
| 134 lvls <- sapply(factor[[2]], function(x) names(x)) | 130 for (i in seq_along(factor[[2]])) { |
| 135 for (i in seq_along(factor[[2]])) { | 131 files <- factor[[2]][[i]][[1]] |
| 136 files <- factor[[2]][[i]][[1]] | 132 sampleTable[files,factorName] <- trim(lvls[i]) |
| 137 sampleTable[files,factorName] <- trim(lvls[i]) | 133 } |
| 138 } | 134 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) |
| 139 sampleTable[[factorName]] <- factor(sampleTable[[factorName]], levels=lvls) | 135 } |
| 140 } | 136 rownames(sampleTable) <- labs |
| 141 rownames(sampleTable) <- labs | |
| 142 } else { | |
| 143 # read the sample_table argument | |
| 144 # this table is described in ?DESeqDataSet | |
| 145 # one column for the sample name, one for the filename, and | |
| 146 # the remaining columns for factors in the analysis | |
| 147 sampleTable <- read.delim(opt$sample_table, stringsAsFactors=FALSE) | |
| 148 factors <- colnames(sampleTable)[-c(1:2)] | |
| 149 for (factor in factors) { | |
| 150 lvls <- unique(as.character(sampleTable[[factor]])) | |
| 151 sampleTable[[factor]] <- factor(sampleTable[[factor]], levels=lvls) | |
| 152 } | |
| 153 } | |
| 154 | 137 |
| 155 primaryFactor <- factors[1] | 138 primaryFactor <- factors[1] |
| 156 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) | 139 designFormula <- as.formula(paste("~", paste(rev(factors), collapse=" + "))) |
| 157 | 140 |
| 158 if (verbose) { | 141 if (verbose) { |
| 214 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n")) | 197 cat(paste("other factors in design:",paste(factors[-length(factors)],collapse=","),"\n")) |
| 215 } | 198 } |
| 216 cat("\n---------------------\n") | 199 cat("\n---------------------\n") |
| 217 } | 200 } |
| 218 | 201 |
| 219 # if JSON input from Galaxy, path is absolute | 202 # For JSON input from Galaxy, path is absolute |
| 220 # otherwise, from sample_table, assume it is relative | 203 dir <- "" |
| 221 dir <- if (is.null(opt$factors)) { | |
| 222 "." | |
| 223 } else { | |
| 224 "" | |
| 225 } | |
| 226 | 204 |
| 227 if (!useTXI) { | 205 if (!useTXI) { |
| 228 # construct the object from HTSeq files | 206 # construct the object from HTSeq files |
| 229 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, | 207 dds <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, |
| 230 directory = dir, | 208 directory = dir, |
