|
1
|
1 ---
|
|
|
2 title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
|
|
|
3 output:
|
|
|
4 html_document:
|
|
|
5 number_sections: true
|
|
|
6 toc: true
|
|
|
7 theme: cosmo
|
|
|
8 highlight: tango
|
|
|
9 ---
|
|
0
|
10
|
|
1
|
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
|
|
|
12 knitr::opts_chunk$set(
|
|
|
13 echo = as.logical(opt$X_e),
|
|
|
14 error = TRUE,
|
|
|
15 eval = TRUE
|
|
|
16 )
|
|
|
17 ```
|
|
0
|
18
|
|
|
19 # Run FastQC
|
|
|
20
|
|
7
|
21 ```{bash}
|
|
|
22 sh ${TOOL_INSTALL_DIR}/shell-script-template.sh
|
|
0
|
23 ```
|
|
|
24
|
|
7
|
25 ```{r echo=FALSE,results='asis'}
|
|
0
|
26 # display fastqc job script
|
|
7
|
27 cat('```bash\n')
|
|
|
28 cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/fastqc.sh')), sep = '\n')
|
|
|
29 cat('\n```')
|
|
0
|
30 ```
|
|
|
31
|
|
|
32 # Fastqc Output Visualization
|
|
|
33
|
|
|
34 ## Overview
|
|
|
35
|
|
|
36 ```{r eval=TRUE}
|
|
3
|
37 read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'),
|
|
|
38 stringsAsFactors = FALSE,
|
|
|
39 header = FALSE, sep = '\t')[, 2:1]
|
|
|
40 read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'),
|
|
|
41 stringsAsFactors = FALSE,
|
|
|
42 header = FALSE, sep = '\t')[, 1]
|
|
|
43 combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE)
|
|
|
44 names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming')
|
|
0
|
45 combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
|
|
|
46 combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
|
|
|
47 knitr::kable(combined_summary)
|
|
|
48 ```
|
|
3
|
49
|
|
|
50 ```{r 'function definition', echo=FALSE}
|
|
|
51 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
|
|
|
52 f = readLines(fastqc_data)
|
|
|
53 start_line = grep(module_name, f)
|
|
|
54 end_module_lines = grep('END_MODULE', f)
|
|
|
55 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
|
|
|
56 module_data = f[(start_line+1):(end_line-1)]
|
|
|
57 writeLines(module_data, '/tmp/temp.txt')
|
|
|
58 read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
|
|
|
59 }
|
|
|
60 ```
|