changeset 3:b9c9d1dacda6 draft

reformat code
author mingchen0919
date Fri, 09 Mar 2018 17:47:50 -0500
parents 7abe0e473013
children acb9908e68e4
files 01_evaluation_overview.Rmd 02_per_base_sequence_quality.Rmd 03_per_tile_sequence_quality.Rmd 04_per_sequence_quality_score.Rmd 05_per_base_sequence_content.Rmd 06_per_sequence_gc_content.Rmd 07_per_base_n_content.Rmd 08_sequence_length_distribution.Rmd 09_sequence_duplication_levels.Rmd 10_adapter_content.Rmd 11_kmer_content.Rmd _site.yml fastqc_site.xml
diffstat 13 files changed, 93 insertions(+), 102 deletions(-) [+]
line wrap: on
line diff
--- a/01_evaluation_overview.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/01_evaluation_overview.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -85,11 +85,27 @@
 ## Overview
 
 ```{r eval=TRUE}
-read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 2:1]
-read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 1]
-combined_summary = cbind(read_1_summary, read_2_summary)
-names(combined_summary) = c('MODULE', paste0(opt$X_n, '(before)'), paste0(opt$X_N, '(after)'))
+read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'),
+                          stringsAsFactors = FALSE,
+                          header = FALSE, sep = '\t')[, 2:1]
+read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'),
+                          stringsAsFactors = FALSE,
+                          header = FALSE, sep = '\t')[, 1]
+combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE)
+names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming')
 combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
 combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
 knitr::kable(combined_summary)
 ```
+
+```{r 'function definition', echo=FALSE}
+extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
+  f = readLines(fastqc_data)
+  start_line = grep(module_name, f)
+  end_module_lines = grep('END_MODULE', f)
+  end_line = end_module_lines[which(end_module_lines > start_line)[1]]
+  module_data = f[(start_line+1):(end_line-1)]
+  writeLines(module_data, '/tmp/temp.txt')
+  read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
+}
+```
--- a/02_per_base_sequence_quality.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/02_per_base_sequence_quality.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -11,20 +11,7 @@
 ```
 
 
-```{r 'function definition', echo=FALSE}
-# Define a function to extract outputs for each module from fastqc output
-extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
-  f = readLines(fastqc_data)
-  start_line = grep(module_name, f)
-  end_module_lines = grep('END_MODULE', f)
-  end_line = end_module_lines[which(end_module_lines > start_line)[1]]
-  module_data = f[(start_line+1):(end_line-1)]
-  writeLines(module_data, '/tmp/temp.txt')
-  read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
-}
-```
-
-# Per base sequence quality
+### Per base sequence quality
 
 ```{r 'per base sequence quality', fig.width=10}
 ## reads 1
@@ -50,13 +37,14 @@
                              fill = "yellow"),
                stat = 'identity') +
   geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
-  scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
+  scale_x_continuous(name = 'Position in read (bp)', breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
+  scale_y_continuous(limits = c(0, max(comb_pbsq$Upper.Quartile) + 5)) +
   scale_fill_identity() +
   scale_color_identity() + 
-  ylim(0, max(comb_pbsq$Upper.Quartile) + 5) +
-  xlab('Position in read (bp)') +
   facet_grid(. ~ trim) +
-  theme(axis.text.x = element_text(angle=45))
+  theme(axis.text.x = element_text(size = 5),
+        panel.background = element_rect(fill = NA),
+        panel.grid.major.y = element_line(color = 'blue', size = 0.1))
 p
+```
 
-```
--- a/03_per_tile_sequence_quality.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/03_per_tile_sequence_quality.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -10,7 +10,7 @@
 )
 ```
 
-# Per tile sequence quality
+### Per tile sequence quality
 
 ```{r 'per tile sequence quality', fig.width=10}
 ## check if 'per tile sequence quality' module exits or not
@@ -29,16 +29,19 @@
   comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base)
   
   # convert integers to charaters
-  comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
+  # comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
   
-  p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) +
-    geom_raster() + 
+  p = ggplot(data = comb_ptsq) +
+    geom_raster(mapping = aes(x = Base, y = X.Tile, fill = Mean)) + 
     facet_grid(. ~ trim) + 
-    xlab('Position in read (bp)') + 
-    ylab('') +
-    theme(axis.text.x = element_text(angle=45))
+    scale_x_discrete(name = "Position in read (bp)") +
+    scale_y_continuous(name = "") +
+    scale_fill_gradient(low = "blue", high = "red") +
+    theme(axis.text.x = element_text(size = 5, angle = 90),
+          axis.text.y = element_text(size = 5),
+        panel.background = element_rect(fill = NA))
   ggplotly(p)
 } else {
   print('No "per tile sequence quality" data')
 }
-```
\ No newline at end of file
+```
--- a/04_per_sequence_quality_score.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/04_per_sequence_quality_score.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -11,7 +11,7 @@
 ```
 
 
-# Per sequence quality score
+### Per sequence quality score
 
 ```{r 'Per sequence quality score', fig.width=10}
 ## reads 1
@@ -25,11 +25,15 @@
 comb_psqs = rbind(psqs_1, psqs_2)
 comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim)
 
-p = ggplot(data = comb_psqs, aes(x = X.Quality, y = Count)) + 
-  geom_line(color = 'red') + 
+p = ggplot(data = comb_psqs) + 
+  geom_line(mapping = aes(x = X.Quality, y = Count), color = 'red') + 
   facet_grid(. ~ trim) + 
-  xlim(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality)) + 
-  xlab('Mean Sequence Qaulity (Phred Score)') + 
-  ylab('')
-ggplotly(p)
+  scale_x_continuous(name = 'Mean Sequence Qaulity (Phred Score)',
+                     limits = c(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality))) +
+  scale_y_continuous(name = '') +
+  theme(panel.background = element_rect(fill = NA),
+        axis.line = element_line(),
+        panel.grid.major.y = element_line(color = 'blue', size = 0.1))
+p
 ```
+
--- a/05_per_base_sequence_content.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/05_per_base_sequence_content.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -11,7 +11,7 @@
 ```
 
 
-# Per base sequence content
+### Per base sequence content
 
 ```{r 'Per base sequence content', fig.width=10}
 ## reads 1
@@ -32,12 +32,14 @@
 comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2)
 comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim)
 
-p = ggplot(data = comb_pbsc, aes(x = id, y = value, color = variable)) +
-  geom_line() +
+p = ggplot(data = comb_pbsc) +
+  geom_line(mapping = aes(x = id, y = value, color = variable)) +
   facet_grid(. ~ trim) +
   xlim(min(comb_pbsc$id), max(comb_pbsc$id)) + 
   ylim(0, 100) +
   xlab('Position in read (bp)') +
-  ylab('')
+  ylab('') +
+  scale_color_discrete(name = '') +
+  theme_classic()
 ggplotly(p)
-```
\ No newline at end of file
+```
--- a/06_per_sequence_gc_content.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/06_per_sequence_gc_content.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -10,7 +10,7 @@
 )
 ```
 
-# Per sequence GC content
+### Per sequence GC content
 
 ```{r 'Per sequence GC content', fig.width=10}
 ## reads 1
@@ -28,6 +28,8 @@
   geom_line(color = 'red') +
   facet_grid(. ~ trim) +
   xlab('Mean Sequence Qaulity (Phred Score)') +
-  ylab('')
+  ylab('') +
+  scale_color_discrete(name = '') +
+  theme_classic()
 ggplotly(p)
-```
\ No newline at end of file
+```
--- a/07_per_base_n_content.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/07_per_base_n_content.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -10,7 +10,7 @@
 )
 ```
 
-# Per base N content
+### Per base N content
 
 ```{r 'Per base N content', fig.width=10}
 ## reads 1
@@ -33,6 +33,8 @@
   ylim(0, 1) + 
   xlab('N-Count') +
   ylab('') + 
-  theme(axis.text.x = element_text(angle=45))
+  theme(axis.text.x = element_text(size = 5),
+        axis.line = element_line(),
+        panel.background = element_rect(fill = NA))
 ggplotly(p)
-```
\ No newline at end of file
+```
--- a/08_sequence_length_distribution.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/08_sequence_length_distribution.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -10,7 +10,7 @@
 )
 ```
 
-# Sequence Length Distribution
+### Sequence Length Distribution
 
 ```{r 'Sequence Length Distribution', fig.width=10}
 ## reads 1
@@ -32,6 +32,9 @@
   facet_grid(. ~ trim) +
   xlab('Sequence Length (bp)') +
   ylab('') + 
-  theme(axis.text.x = element_text(angle=45))
+  theme(axis.text.x = element_text(size = 5),
+        panel.background = element_rect(fill = NA),
+        axis.line = element_line(), 
+        plot.margin = margin(2,2,2,10) )
 ggplotly(p)
 ```
\ No newline at end of file
--- a/09_sequence_duplication_levels.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/09_sequence_duplication_levels.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -11,7 +11,7 @@
 ```
 
 
-# Sequence Duplication Levels
+### Sequence Duplication Levels
 
 ```{r 'Sequence Duplication Levels', fig.width=10}
 ## reads 1
@@ -34,12 +34,15 @@
 comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
 comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
 
-p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) +
-  geom_line() +
+p = ggplot(data = comb_sdl) +
+  geom_line(mapping = aes(x = id, y = value, color = variable)) +
   scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
   facet_grid(. ~ trim) +
   xlab('Sequence Duplication Level') +
   ylab('') + 
-  theme(axis.text.x = element_text(angle=45))
-ggplotly(p)
-```
\ No newline at end of file
+  scale_color_discrete(name = '') +
+  theme(axis.text.x = element_text(size = 5),
+        panel.background = element_rect(fill = NA),
+        axis.line = element_line())
+p
+```
--- a/10_adapter_content.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/10_adapter_content.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -10,7 +10,7 @@
 )
 ```
 
-# Adapter Content
+### Adapter Content
 
 ```{r 'Adapter Content', fig.width=10}
 ## reads 1
@@ -36,6 +36,10 @@
   xlim(min(comb_ac$id), max(comb_ac$id)) + 
   ylim(0, 1) +
   xlab('Position in read (bp)') +
-  ylab('')
+  ylab('') +
+  scale_color_discrete(name = '') +
+  theme(axis.text.x = element_text(size = 5),
+        panel.background = element_rect(fill = NA),
+        axis.line = element_line())
 ggplotly(p)
-```
\ No newline at end of file
+```
--- a/11_kmer_content.Rmd	Tue Feb 27 10:39:23 2018 -0500
+++ b/11_kmer_content.Rmd	Fri Mar 09 17:47:50 2018 -0500
@@ -10,17 +10,18 @@
 )
 ```
 
-# Kmer Content {.tabset}
+### Kmer Content {.tabset}
 
-## Before
+#### Before
 
 ```{r 'Kmer Content (before)', fig.width=10}
 kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content')
 knitr::kable(kc_1)
 ```
 
-## After
+#### After
 ```{r 'Kmer Content (after)', fig.width=10}
 kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content')
 knitr::kable(kc_2)
 ```
+
--- a/_site.yml	Tue Feb 27 10:39:23 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-name: "FastQC Website"
-output_dir: "../_site"
-navbar:
-    title: "FastQC"
-    type: inverse
-    left:
-        - text: "Home"
-          icon: fa-home
-          href: index.html
-        - text: "Evaluation Overview"
-          href: 01_evaluation_overview.html
-        - text: "Evaluation by data module"
-          menu:
-            - text: "Per Base Sequence Quality"
-              href: 02_per_base_sequence_quality.html
-            - text: "Per Tile Sequence Quality"
-              href: 03_per_tile_sequence_quality.html
-            - text: "Per Sequence Quality Score"
-              href: 04_per_sequence_quality_score.html
-            - text: "Per Base Sequence Content"
-              href: 05_per_base_sequence_content.html
-            - text: "Per Sequence GC Content"
-              href: 06_per_sequence_gc_content.html
-            - text: "Per Base N Content"
-              href: 07_per_base_n_content.html
-            - text: "Sequence Length Distribution"
-              href: 08_sequence_length_distribution.html
-            - text: "Sequence Duplication Levels"
-              href: 09_sequence_duplication_levels.html
-            - text: "Adapter Content"
-              href: 10_adapter_content.html
-            - text: "Kmer Content"
-              href: 11_kmer_content.html
-output:
-  html_document:
-    theme: cosmo
-    highlight: textmate
\ No newline at end of file
--- a/fastqc_site.xml	Tue Feb 27 10:39:23 2018 -0500
+++ b/fastqc_site.xml	Fri Mar 09 17:47:50 2018 -0500
@@ -1,4 +1,4 @@
-<tool name="Fastqc site" id='aurora_fastqc_site' version="2.1.0">
+<tool name="Aurora Fastqc site" id='aurora_fastqc_site' version="2.1.0">
     <description>
         Evaluate short reads with FastQC software on a single or a paired of untrimmed and trimmed reads
         files.
@@ -55,7 +55,7 @@
                label="Display analysis code in report?"/>
     </inputs>
     <outputs>
-        <data format="html" name="report" label="fastqc site"/>
+        <data format="html" name="report" label="aurora fastqc site"/>
         <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
     </outputs>
     <citations>