Mercurial > repos > mingchen0919 > aurora_fastqc_site
changeset 3:b9c9d1dacda6 draft
reformat code
author | mingchen0919 |
---|---|
date | Fri, 09 Mar 2018 17:47:50 -0500 |
parents | 7abe0e473013 |
children | acb9908e68e4 |
files | 01_evaluation_overview.Rmd 02_per_base_sequence_quality.Rmd 03_per_tile_sequence_quality.Rmd 04_per_sequence_quality_score.Rmd 05_per_base_sequence_content.Rmd 06_per_sequence_gc_content.Rmd 07_per_base_n_content.Rmd 08_sequence_length_distribution.Rmd 09_sequence_duplication_levels.Rmd 10_adapter_content.Rmd 11_kmer_content.Rmd _site.yml fastqc_site.xml |
diffstat | 13 files changed, 93 insertions(+), 102 deletions(-) [+] |
line wrap: on
line diff
--- a/01_evaluation_overview.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/01_evaluation_overview.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -85,11 +85,27 @@ ## Overview ```{r eval=TRUE} -read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 2:1] -read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 1] -combined_summary = cbind(read_1_summary, read_2_summary) -names(combined_summary) = c('MODULE', paste0(opt$X_n, '(before)'), paste0(opt$X_N, '(after)')) +read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), + stringsAsFactors = FALSE, + header = FALSE, sep = '\t')[, 2:1] +read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), + stringsAsFactors = FALSE, + header = FALSE, sep = '\t')[, 1] +combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE) +names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming') combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)' combined_summary[combined_summary == 'WARN'] = 'WARN (!)' knitr::kable(combined_summary) ``` + +```{r 'function definition', echo=FALSE} +extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { + f = readLines(fastqc_data) + start_line = grep(module_name, f) + end_module_lines = grep('END_MODULE', f) + end_line = end_module_lines[which(end_module_lines > start_line)[1]] + module_data = f[(start_line+1):(end_line-1)] + writeLines(module_data, '/tmp/temp.txt') + read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char) +} +```
--- a/02_per_base_sequence_quality.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/02_per_base_sequence_quality.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -11,20 +11,7 @@ ``` -```{r 'function definition', echo=FALSE} -# Define a function to extract outputs for each module from fastqc output -extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { - f = readLines(fastqc_data) - start_line = grep(module_name, f) - end_module_lines = grep('END_MODULE', f) - end_line = end_module_lines[which(end_module_lines > start_line)[1]] - module_data = f[(start_line+1):(end_line-1)] - writeLines(module_data, '/tmp/temp.txt') - read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char) -} -``` - -# Per base sequence quality +### Per base sequence quality ```{r 'per base sequence quality', fig.width=10} ## reads 1 @@ -50,13 +37,14 @@ fill = "yellow"), stat = 'identity') + geom_line(mapping = aes(x = id, y = Mean, color = "red")) + - scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) + + scale_x_continuous(name = 'Position in read (bp)', breaks = pbsq_2$id, labels = pbsq_2$X.Base) + + scale_y_continuous(limits = c(0, max(comb_pbsq$Upper.Quartile) + 5)) + scale_fill_identity() + scale_color_identity() + - ylim(0, max(comb_pbsq$Upper.Quartile) + 5) + - xlab('Position in read (bp)') + facet_grid(. ~ trim) + - theme(axis.text.x = element_text(angle=45)) + theme(axis.text.x = element_text(size = 5), + panel.background = element_rect(fill = NA), + panel.grid.major.y = element_line(color = 'blue', size = 0.1)) p +``` -```
--- a/03_per_tile_sequence_quality.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/03_per_tile_sequence_quality.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -10,7 +10,7 @@ ) ``` -# Per tile sequence quality +### Per tile sequence quality ```{r 'per tile sequence quality', fig.width=10} ## check if 'per tile sequence quality' module exits or not @@ -29,16 +29,19 @@ comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base) # convert integers to charaters - comb_ptsq$Tile = as.character(comb_ptsq$X.Tile) + # comb_ptsq$Tile = as.character(comb_ptsq$X.Tile) - p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) + - geom_raster() + + p = ggplot(data = comb_ptsq) + + geom_raster(mapping = aes(x = Base, y = X.Tile, fill = Mean)) + facet_grid(. ~ trim) + - xlab('Position in read (bp)') + - ylab('') + - theme(axis.text.x = element_text(angle=45)) + scale_x_discrete(name = "Position in read (bp)") + + scale_y_continuous(name = "") + + scale_fill_gradient(low = "blue", high = "red") + + theme(axis.text.x = element_text(size = 5, angle = 90), + axis.text.y = element_text(size = 5), + panel.background = element_rect(fill = NA)) ggplotly(p) } else { print('No "per tile sequence quality" data') } -``` \ No newline at end of file +```
--- a/04_per_sequence_quality_score.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/04_per_sequence_quality_score.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -11,7 +11,7 @@ ``` -# Per sequence quality score +### Per sequence quality score ```{r 'Per sequence quality score', fig.width=10} ## reads 1 @@ -25,11 +25,15 @@ comb_psqs = rbind(psqs_1, psqs_2) comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim) -p = ggplot(data = comb_psqs, aes(x = X.Quality, y = Count)) + - geom_line(color = 'red') + +p = ggplot(data = comb_psqs) + + geom_line(mapping = aes(x = X.Quality, y = Count), color = 'red') + facet_grid(. ~ trim) + - xlim(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality)) + - xlab('Mean Sequence Qaulity (Phred Score)') + - ylab('') -ggplotly(p) + scale_x_continuous(name = 'Mean Sequence Qaulity (Phred Score)', + limits = c(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality))) + + scale_y_continuous(name = '') + + theme(panel.background = element_rect(fill = NA), + axis.line = element_line(), + panel.grid.major.y = element_line(color = 'blue', size = 0.1)) +p ``` +
--- a/05_per_base_sequence_content.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/05_per_base_sequence_content.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -11,7 +11,7 @@ ``` -# Per base sequence content +### Per base sequence content ```{r 'Per base sequence content', fig.width=10} ## reads 1 @@ -32,12 +32,14 @@ comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2) comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim) -p = ggplot(data = comb_pbsc, aes(x = id, y = value, color = variable)) + - geom_line() + +p = ggplot(data = comb_pbsc) + + geom_line(mapping = aes(x = id, y = value, color = variable)) + facet_grid(. ~ trim) + xlim(min(comb_pbsc$id), max(comb_pbsc$id)) + ylim(0, 100) + xlab('Position in read (bp)') + - ylab('') + ylab('') + + scale_color_discrete(name = '') + + theme_classic() ggplotly(p) -``` \ No newline at end of file +```
--- a/06_per_sequence_gc_content.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/06_per_sequence_gc_content.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -10,7 +10,7 @@ ) ``` -# Per sequence GC content +### Per sequence GC content ```{r 'Per sequence GC content', fig.width=10} ## reads 1 @@ -28,6 +28,8 @@ geom_line(color = 'red') + facet_grid(. ~ trim) + xlab('Mean Sequence Qaulity (Phred Score)') + - ylab('') + ylab('') + + scale_color_discrete(name = '') + + theme_classic() ggplotly(p) -``` \ No newline at end of file +```
--- a/07_per_base_n_content.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/07_per_base_n_content.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -10,7 +10,7 @@ ) ``` -# Per base N content +### Per base N content ```{r 'Per base N content', fig.width=10} ## reads 1 @@ -33,6 +33,8 @@ ylim(0, 1) + xlab('N-Count') + ylab('') + - theme(axis.text.x = element_text(angle=45)) + theme(axis.text.x = element_text(size = 5), + axis.line = element_line(), + panel.background = element_rect(fill = NA)) ggplotly(p) -``` \ No newline at end of file +```
--- a/08_sequence_length_distribution.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/08_sequence_length_distribution.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -10,7 +10,7 @@ ) ``` -# Sequence Length Distribution +### Sequence Length Distribution ```{r 'Sequence Length Distribution', fig.width=10} ## reads 1 @@ -32,6 +32,9 @@ facet_grid(. ~ trim) + xlab('Sequence Length (bp)') + ylab('') + - theme(axis.text.x = element_text(angle=45)) + theme(axis.text.x = element_text(size = 5), + panel.background = element_rect(fill = NA), + axis.line = element_line(), + plot.margin = margin(2,2,2,10) ) ggplotly(p) ``` \ No newline at end of file
--- a/09_sequence_duplication_levels.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/09_sequence_duplication_levels.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -11,7 +11,7 @@ ``` -# Sequence Duplication Levels +### Sequence Duplication Levels ```{r 'Sequence Duplication Levels', fig.width=10} ## reads 1 @@ -34,12 +34,15 @@ comb_sdl = rbind(melt_sdl_1, melt_sdl_2) comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim) -p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) + - geom_line() + +p = ggplot(data = comb_sdl) + + geom_line(mapping = aes(x = id, y = value, color = variable)) + scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) + facet_grid(. ~ trim) + xlab('Sequence Duplication Level') + ylab('') + - theme(axis.text.x = element_text(angle=45)) -ggplotly(p) -``` \ No newline at end of file + scale_color_discrete(name = '') + + theme(axis.text.x = element_text(size = 5), + panel.background = element_rect(fill = NA), + axis.line = element_line()) +p +```
--- a/10_adapter_content.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/10_adapter_content.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -10,7 +10,7 @@ ) ``` -# Adapter Content +### Adapter Content ```{r 'Adapter Content', fig.width=10} ## reads 1 @@ -36,6 +36,10 @@ xlim(min(comb_ac$id), max(comb_ac$id)) + ylim(0, 1) + xlab('Position in read (bp)') + - ylab('') + ylab('') + + scale_color_discrete(name = '') + + theme(axis.text.x = element_text(size = 5), + panel.background = element_rect(fill = NA), + axis.line = element_line()) ggplotly(p) -``` \ No newline at end of file +```
--- a/11_kmer_content.Rmd Tue Feb 27 10:39:23 2018 -0500 +++ b/11_kmer_content.Rmd Fri Mar 09 17:47:50 2018 -0500 @@ -10,17 +10,18 @@ ) ``` -# Kmer Content {.tabset} +### Kmer Content {.tabset} -## Before +#### Before ```{r 'Kmer Content (before)', fig.width=10} kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content') knitr::kable(kc_1) ``` -## After +#### After ```{r 'Kmer Content (after)', fig.width=10} kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content') knitr::kable(kc_2) ``` +
--- a/_site.yml Tue Feb 27 10:39:23 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -name: "FastQC Website" -output_dir: "../_site" -navbar: - title: "FastQC" - type: inverse - left: - - text: "Home" - icon: fa-home - href: index.html - - text: "Evaluation Overview" - href: 01_evaluation_overview.html - - text: "Evaluation by data module" - menu: - - text: "Per Base Sequence Quality" - href: 02_per_base_sequence_quality.html - - text: "Per Tile Sequence Quality" - href: 03_per_tile_sequence_quality.html - - text: "Per Sequence Quality Score" - href: 04_per_sequence_quality_score.html - - text: "Per Base Sequence Content" - href: 05_per_base_sequence_content.html - - text: "Per Sequence GC Content" - href: 06_per_sequence_gc_content.html - - text: "Per Base N Content" - href: 07_per_base_n_content.html - - text: "Sequence Length Distribution" - href: 08_sequence_length_distribution.html - - text: "Sequence Duplication Levels" - href: 09_sequence_duplication_levels.html - - text: "Adapter Content" - href: 10_adapter_content.html - - text: "Kmer Content" - href: 11_kmer_content.html -output: - html_document: - theme: cosmo - highlight: textmate \ No newline at end of file
--- a/fastqc_site.xml Tue Feb 27 10:39:23 2018 -0500 +++ b/fastqc_site.xml Fri Mar 09 17:47:50 2018 -0500 @@ -1,4 +1,4 @@ -<tool name="Fastqc site" id='aurora_fastqc_site' version="2.1.0"> +<tool name="Aurora Fastqc site" id='aurora_fastqc_site' version="2.1.0"> <description> Evaluate short reads with FastQC software on a single or a paired of untrimmed and trimmed reads files. @@ -55,7 +55,7 @@ label="Display analysis code in report?"/> </inputs> <outputs> - <data format="html" name="report" label="fastqc site"/> + <data format="html" name="report" label="aurora fastqc site"/> <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> </outputs> <citations>