1
|
1 ---
|
|
2 title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
|
|
3 output:
|
|
4 html_document:
|
|
5 number_sections: true
|
|
6 toc: true
|
|
7 theme: cosmo
|
|
8 highlight: tango
|
|
9 ---
|
0
|
10
|
1
|
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
|
|
12 knitr::opts_chunk$set(
|
|
13 echo = as.logical(opt$X_e),
|
|
14 error = TRUE,
|
|
15 eval = TRUE
|
|
16 )
|
|
17 ```
|
0
|
18
|
|
19 # Run FastQC
|
|
20
|
|
21 ```{bash eval=TRUE,echo=FALSE}
|
|
22 cd ${X_d}
|
|
23 cp ${X_r} ${X_d}/read_1.fq
|
|
24 cp ${X_R} ${X_d}/read_2.fq
|
|
25
|
6
|
26 mkdir -p read_1 read_2
|
0
|
27 cat >temp.sh <<EOL
|
|
28 fastqc \\
|
|
29 -q \\
|
|
30 -c ${X_c} \\
|
|
31 -l ${X_l} \\
|
|
32 ${X_d}/read_1.fq > /dev/null 2>&1
|
|
33
|
|
34 fastqc \\
|
|
35 -q \\
|
|
36 -c ${X_c} \\
|
|
37 -l ${X_l} \\
|
|
38 ${X_d}/read_2.fq > /dev/null 2>&1
|
|
39 EOL
|
|
40
|
|
41 grep -v None temp.sh > fastqc.sh
|
|
42
|
|
43 # run fastqc
|
|
44 sh fastqc.sh
|
|
45
|
|
46 # unzip outputs
|
|
47 unzip -q read_1_fastqc.zip
|
|
48 unzip -q read_2_fastqc.zip
|
|
49 ```
|
|
50
|
|
51 ```{r}
|
|
52 # display fastqc job script
|
|
53 fastqc_sh = paste0(opt$X_d, '/fastqc.sh')
|
|
54 tags$code(tags$pre(readChar(fastqc_sh, file.info(fastqc_sh)$size )))
|
|
55 ```
|
|
56
|
|
57 # Raw FastQC reports
|
|
58
|
|
59 ## Before trimming
|
|
60 ```{r eval=TRUE}
|
|
61 ori_html = tags$a(href = 'read_1_fastqc/fastqc_report.html', opt$X_n)
|
|
62 ori_fastqc_data = tags$a(href = 'read_1_fastqc/fastqc_data.txt', 'fastqc_data.txt')
|
|
63 ori_summary = tags$a(href = 'read_1_fastqc/summary.txt', 'summary.txt')
|
|
64 tags$ul(
|
|
65 tags$li(ori_html),
|
|
66 tags$li(ori_fastqc_data),
|
|
67 tags$li(ori_summary)
|
|
68 )
|
|
69 ```
|
|
70
|
|
71 ## After trimming
|
|
72 ```{r eval=TRUE}
|
|
73 ori_html = tags$a(href = 'read_2_fastqc/fastqc_report.html', opt$X_n)
|
|
74 ori_fastqc_data = tags$a(href = 'read_2_fastqc/fastqc_data.txt', 'fastqc_data.txt')
|
|
75 ori_summary = tags$a(href = 'read_2_fastqc/summary.txt', 'summary.txt')
|
|
76 tags$ul(
|
|
77 tags$li(ori_html),
|
|
78 tags$li(ori_fastqc_data),
|
|
79 tags$li(ori_summary)
|
|
80 )
|
|
81 ```
|
|
82
|
|
83
|
|
84 # Fastqc Output Visualization
|
|
85
|
|
86 ## Overview
|
|
87
|
|
88 ```{r eval=TRUE}
|
3
|
89 read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'),
|
|
90 stringsAsFactors = FALSE,
|
|
91 header = FALSE, sep = '\t')[, 2:1]
|
|
92 read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'),
|
|
93 stringsAsFactors = FALSE,
|
|
94 header = FALSE, sep = '\t')[, 1]
|
|
95 combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE)
|
|
96 names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming')
|
0
|
97 combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
|
|
98 combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
|
|
99 knitr::kable(combined_summary)
|
|
100 ```
|
3
|
101
|
|
102 ```{r 'function definition', echo=FALSE}
|
|
103 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
|
|
104 f = readLines(fastqc_data)
|
|
105 start_line = grep(module_name, f)
|
|
106 end_module_lines = grep('END_MODULE', f)
|
|
107 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
|
|
108 module_data = f[(start_line+1):(end_line-1)]
|
|
109 writeLines(module_data, '/tmp/temp.txt')
|
|
110 read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
|
|
111 }
|
|
112 ```
|