Mercurial > repos > mingchen0919 > aurora_fastqc_site
view 01_evaluation_overview.Rmd @ 4:acb9908e68e4 draft
2.1.1
| author | mingchen0919 |
|---|---|
| date | Tue, 13 Mar 2018 11:05:21 -0400 |
| parents | b9c9d1dacda6 |
| children | 53b851fa90de |
line wrap: on
line source
--- title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)' output: html_document: number_sections: true toc: true theme: cosmo highlight: tango --- ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( echo = as.logical(opt$X_e), error = TRUE, eval = TRUE ) ``` # Run FastQC ```{bash eval=TRUE,echo=FALSE} cd ${X_d} cp ${X_r} ${X_d}/read_1.fq cp ${X_R} ${X_d}/read_2.fq cat >temp.sh <<EOL fastqc \\ -q \\ -c ${X_c} \\ -l ${X_l} \\ ${X_d}/read_1.fq > /dev/null 2>&1 fastqc \\ -q \\ -c ${X_c} \\ -l ${X_l} \\ ${X_d}/read_2.fq > /dev/null 2>&1 EOL grep -v None temp.sh > fastqc.sh # run fastqc sh fastqc.sh # unzip outputs unzip -q read_1_fastqc.zip unzip -q read_2_fastqc.zip ``` ```{r} # display fastqc job script fastqc_sh = paste0(opt$X_d, '/fastqc.sh') tags$code(tags$pre(readChar(fastqc_sh, file.info(fastqc_sh)$size ))) ``` # Raw FastQC reports ## Before trimming ```{r eval=TRUE} ori_html = tags$a(href = 'read_1_fastqc/fastqc_report.html', opt$X_n) ori_fastqc_data = tags$a(href = 'read_1_fastqc/fastqc_data.txt', 'fastqc_data.txt') ori_summary = tags$a(href = 'read_1_fastqc/summary.txt', 'summary.txt') tags$ul( tags$li(ori_html), tags$li(ori_fastqc_data), tags$li(ori_summary) ) ``` ## After trimming ```{r eval=TRUE} ori_html = tags$a(href = 'read_2_fastqc/fastqc_report.html', opt$X_n) ori_fastqc_data = tags$a(href = 'read_2_fastqc/fastqc_data.txt', 'fastqc_data.txt') ori_summary = tags$a(href = 'read_2_fastqc/summary.txt', 'summary.txt') tags$ul( tags$li(ori_html), tags$li(ori_fastqc_data), tags$li(ori_summary) ) ``` # Fastqc Output Visualization ## Overview ```{r eval=TRUE} read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), stringsAsFactors = FALSE, header = FALSE, sep = '\t')[, 2:1] read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), stringsAsFactors = FALSE, header = FALSE, sep = '\t')[, 1] combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE) names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming') combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)' combined_summary[combined_summary == 'WARN'] = 'WARN (!)' knitr::kable(combined_summary) ``` ```{r 'function definition', echo=FALSE} extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { f = readLines(fastqc_data) start_line = grep(module_name, f) end_module_lines = grep('END_MODULE', f) end_line = end_module_lines[which(end_module_lines > start_line)[1]] module_data = f[(start_line+1):(end_line-1)] writeLines(module_data, '/tmp/temp.txt') read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char) } ```
