1
|
1 ---
|
|
2 title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
|
|
3 output:
|
|
4 html_document:
|
|
5 number_sections: true
|
|
6 toc: true
|
|
7 theme: cosmo
|
|
8 highlight: tango
|
|
9 ---
|
0
|
10
|
1
|
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
|
|
12 knitr::opts_chunk$set(
|
|
13 echo = as.logical(opt$X_e),
|
|
14 error = TRUE,
|
|
15 eval = TRUE
|
|
16 )
|
|
17 ```
|
0
|
18
|
|
19 # Run FastQC
|
|
20
|
|
21 ```{bash eval=TRUE,echo=FALSE}
|
|
22 cd ${X_d}
|
|
23 cp ${X_r} ${X_d}/read_1.fq
|
|
24 cp ${X_R} ${X_d}/read_2.fq
|
|
25
|
|
26 cat >temp.sh <<EOL
|
|
27 fastqc \\
|
|
28 -q \\
|
|
29 -c ${X_c} \\
|
|
30 -l ${X_l} \\
|
|
31 ${X_d}/read_1.fq > /dev/null 2>&1
|
|
32
|
|
33 fastqc \\
|
|
34 -q \\
|
|
35 -c ${X_c} \\
|
|
36 -l ${X_l} \\
|
|
37 ${X_d}/read_2.fq > /dev/null 2>&1
|
|
38 EOL
|
|
39
|
|
40 grep -v None temp.sh > fastqc.sh
|
|
41
|
|
42 # run fastqc
|
|
43 sh fastqc.sh
|
|
44
|
|
45 # unzip outputs
|
|
46 unzip -q read_1_fastqc.zip
|
|
47 unzip -q read_2_fastqc.zip
|
|
48 ```
|
|
49
|
|
50 ```{r}
|
|
51 # display fastqc job script
|
|
52 fastqc_sh = paste0(opt$X_d, '/fastqc.sh')
|
|
53 tags$code(tags$pre(readChar(fastqc_sh, file.info(fastqc_sh)$size )))
|
|
54 ```
|
|
55
|
|
56 # Raw FastQC reports
|
|
57
|
|
58 ## Before trimming
|
|
59 ```{r eval=TRUE}
|
|
60 ori_html = tags$a(href = 'read_1_fastqc/fastqc_report.html', opt$X_n)
|
|
61 ori_fastqc_data = tags$a(href = 'read_1_fastqc/fastqc_data.txt', 'fastqc_data.txt')
|
|
62 ori_summary = tags$a(href = 'read_1_fastqc/summary.txt', 'summary.txt')
|
|
63 tags$ul(
|
|
64 tags$li(ori_html),
|
|
65 tags$li(ori_fastqc_data),
|
|
66 tags$li(ori_summary)
|
|
67 )
|
|
68 ```
|
|
69
|
|
70 ## After trimming
|
|
71 ```{r eval=TRUE}
|
|
72 ori_html = tags$a(href = 'read_2_fastqc/fastqc_report.html', opt$X_n)
|
|
73 ori_fastqc_data = tags$a(href = 'read_2_fastqc/fastqc_data.txt', 'fastqc_data.txt')
|
|
74 ori_summary = tags$a(href = 'read_2_fastqc/summary.txt', 'summary.txt')
|
|
75 tags$ul(
|
|
76 tags$li(ori_html),
|
|
77 tags$li(ori_fastqc_data),
|
|
78 tags$li(ori_summary)
|
|
79 )
|
|
80 ```
|
|
81
|
|
82
|
|
83 # Fastqc Output Visualization
|
|
84
|
|
85 ## Overview
|
|
86
|
|
87 ```{r eval=TRUE}
|
3
|
88 read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'),
|
|
89 stringsAsFactors = FALSE,
|
|
90 header = FALSE, sep = '\t')[, 2:1]
|
|
91 read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'),
|
|
92 stringsAsFactors = FALSE,
|
|
93 header = FALSE, sep = '\t')[, 1]
|
|
94 combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE)
|
|
95 names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming')
|
0
|
96 combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
|
|
97 combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
|
|
98 knitr::kable(combined_summary)
|
|
99 ```
|
3
|
100
|
|
101 ```{r 'function definition', echo=FALSE}
|
|
102 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
|
|
103 f = readLines(fastqc_data)
|
|
104 start_line = grep(module_name, f)
|
|
105 end_module_lines = grep('END_MODULE', f)
|
|
106 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
|
|
107 module_data = f[(start_line+1):(end_line-1)]
|
|
108 writeLines(module_data, '/tmp/temp.txt')
|
|
109 read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
|
|
110 }
|
|
111 ```
|