Mercurial > repos > mingchen0919 > aurora_deseq2

--- a/deseq2.Rmd	Thu Mar 08 22:04:17 2018 -0500
+++ b/deseq2.Rmd	Fri Mar 09 00:02:09 2018 -0500
@@ -14,10 +14,13 @@
 ## DESeq2 analysis

 ```{r}
-coldata = read.csv(opt$X_B, row.names = 1, header = TRUE)
+# load count data
+load(opt$X_A)
+# load column data
+coldata = read.csv(opt$X_B, row.names = 1, header = TRUE)[colnames(count_data), ]
 dds = DESeqDataSetFromMatrix(countData = count_data,
                              colData = coldata,
-                             design = expression(opt$X_C))
+                             design = formula(opt$X_C))
 dds = DESeq(dds, test = opt$X_G, fitType = opt$X_H)
 ```

@@ -26,6 +29,58 @@

 ```{r}
 res = results(dds, contrast = c(opt$X_D, opt$X_E, opt$X_F), alpha = opt$X_I)
-res
+DT::datatable(as.data.frame(res))
+```
+
+```{r}
+# significant genes
+sig_res = res[(res$padj < opt$X_I) & !is.na(res$padj), ]
+write.csv(sig_res, file = paste0(opt$X_d, '/significant_genes.csv'), quote = FALSE)
+```
+
+```{bash echo=FALSE}
+cp ${X_d}/significant_genes.csv ${X_J}
 ```

+## MA-plot
+
+```{r warning=FALSE, message=FALSE}
+df = data.frame(ID = rownames(res),
+                mean = res$baseMean,
+                lfc = res$log2FoldChange,
+                padj = res$padj,
+                col = ifelse(res$padj < opt$X_I,
+                             paste0("padj < ", opt$X_I),
+                             paste0("padj >= ", opt$X_I)),
+                stringsAsFactors = FALSE)
+p = ggplot(data = df) +
+  geom_point(mapping = aes(x = log(mean), y = lfc, col = col, key = ID)) +
+  scale_x_continuous(name = 'Log(mean)') +
+  scale_y_continuous(name = 'Log fold change') +
+  scale_color_manual(name = 'Significance')+
+  theme_classic()
+ggplotly(p)
+```
+
+
+## Heatmap of count matrix
+
+```{r}
+ntd <- normTransform(dds)
+select <- order(rowMeans(counts(dds,normalized=TRUE)),
+                decreasing=TRUE)[1:20]
+df <- as.data.frame(colData(dds)[, -ncol(colData(dds))])
+pheatmap(assay(ntd)[select,], annotation_col=df)
+```
+
+
+## Principle component plot
+
+```{r}
+vsd <- vst(dds, blind=FALSE)
+p = plotPCA(vsd, intgroup=c(opt$X_D)) +
+  scale_color_discrete(name = 'Group') +
+  theme_classic()
+ggplotly(p)
+```
+
--- a/deseq2.xml	Thu Mar 08 22:04:17 2018 -0500
+++ b/deseq2.xml	Fri Mar 09 00:02:09 2018 -0500
@@ -8,6 +8,7 @@
         <requirement type="package" version="4.5.6">r-plotly</requirement>
         <requirement type="package" version="2.2.1">r-ggplot2</requirement>
         <requirement type="package" version="1.0.8">r-pheatmap</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
     </requirements>
     <stdio>
         <regex match="XXX" source="stderr" level="warning"
@@ -32,7 +33,7 @@
   -I '$alpha'
   -J '$significant_genes']]></command>
     <inputs>
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="true"
                label="Display analysis code in report?"/>
         <param type="data" name="count_data" label="Count data"
                help="an RData file that stores the count matrix data. The file is generated from the aurora_htseq tool."
@@ -74,7 +75,7 @@
                optional="False" value="0.1" min="0" max="1"/>
     </inputs>
     <outputs>
-        <data format="html" name="report" label="tool report"/>
+        <data format="html" name="report" label="Aurora DESeq2"/>
         <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
         <data name="significant_genes" format="csv" label="signficant genes from ${on_string} " hidden="false"/>
     </outputs>
--- a/deseq2_render.R	Thu Mar 08 22:04:17 2018 -0500
+++ b/deseq2_render.R	Fri Mar 09 00:02:09 2018 -0500
@@ -13,6 +13,9 @@
 library(ggplot2)
 library(plotly)
 library(htmltools)
+library(DESeq2)
+library(pheatmap)
+library(DT)
 #------------------------------------------------


@@ -25,7 +28,7 @@
 spec_matrix = as.matrix(
   data.frame(stringsAsFactors=FALSE,
               long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_A", "X_B",
-                             "X_C", "X_D", "X_E", "X_F", "X_G", "X_H", "X_I"),
+                             "X_C", "X_D", "X_E", "X_F", "X_G", "X_H", "X_I", "X_J"),
              short_flags = c("e", "o", "d", "s", "t", "A", "B", "C", "D", "E",
                              "F", "G", "H", "I", "J"),
      argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,