annotate 01_evaluation_overview.Rmd @ 3:b9c9d1dacda6 draft

reformat code
author mingchen0919
date Fri, 09 Mar 2018 17:47:50 -0500
parents 645291efd2e7
children 53b851fa90de
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
1 ---
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
2 title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
3 output:
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
4 html_document:
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
5 number_sections: true
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
6 toc: true
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
7 theme: cosmo
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
8 highlight: tango
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
9 ---
0
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
10
1
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
12 knitr::opts_chunk$set(
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
13 echo = as.logical(opt$X_e),
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
14 error = TRUE,
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
15 eval = TRUE
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
16 )
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
17 ```
0
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
18
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
19 # Run FastQC
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
20
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
21 ```{bash eval=TRUE,echo=FALSE}
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
22 cd ${X_d}
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
23 cp ${X_r} ${X_d}/read_1.fq
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
24 cp ${X_R} ${X_d}/read_2.fq
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
25
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
26 cat >temp.sh <<EOL
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
27 fastqc \\
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
28 -q \\
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
29 -c ${X_c} \\
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
30 -l ${X_l} \\
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
31 ${X_d}/read_1.fq > /dev/null 2>&1
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
32
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
33 fastqc \\
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
34 -q \\
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
35 -c ${X_c} \\
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
36 -l ${X_l} \\
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
37 ${X_d}/read_2.fq > /dev/null 2>&1
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
38 EOL
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
39
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
40 grep -v None temp.sh > fastqc.sh
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
41
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
42 # run fastqc
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
43 sh fastqc.sh
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
44
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
45 # unzip outputs
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
46 unzip -q read_1_fastqc.zip
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
47 unzip -q read_2_fastqc.zip
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
48 ```
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
49
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
50 ```{r}
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
51 # display fastqc job script
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
52 fastqc_sh = paste0(opt$X_d, '/fastqc.sh')
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
53 tags$code(tags$pre(readChar(fastqc_sh, file.info(fastqc_sh)$size )))
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
54 ```
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
55
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
56 # Raw FastQC reports
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
57
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
58 ## Before trimming
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
59 ```{r eval=TRUE}
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
60 ori_html = tags$a(href = 'read_1_fastqc/fastqc_report.html', opt$X_n)
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
61 ori_fastqc_data = tags$a(href = 'read_1_fastqc/fastqc_data.txt', 'fastqc_data.txt')
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
62 ori_summary = tags$a(href = 'read_1_fastqc/summary.txt', 'summary.txt')
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
63 tags$ul(
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
64 tags$li(ori_html),
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
65 tags$li(ori_fastqc_data),
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
66 tags$li(ori_summary)
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
67 )
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
68 ```
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
69
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
70 ## After trimming
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
71 ```{r eval=TRUE}
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
72 ori_html = tags$a(href = 'read_2_fastqc/fastqc_report.html', opt$X_n)
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
73 ori_fastqc_data = tags$a(href = 'read_2_fastqc/fastqc_data.txt', 'fastqc_data.txt')
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
74 ori_summary = tags$a(href = 'read_2_fastqc/summary.txt', 'summary.txt')
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
75 tags$ul(
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
76 tags$li(ori_html),
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
77 tags$li(ori_fastqc_data),
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
78 tags$li(ori_summary)
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
79 )
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
80 ```
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
81
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
82
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
83 # Fastqc Output Visualization
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
84
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
85 ## Overview
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
86
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
87 ```{r eval=TRUE}
3
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
88 read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'),
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
89 stringsAsFactors = FALSE,
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
90 header = FALSE, sep = '\t')[, 2:1]
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
91 read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'),
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
92 stringsAsFactors = FALSE,
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
93 header = FALSE, sep = '\t')[, 1]
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
94 combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE)
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
95 names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming')
0
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
96 combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
97 combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
98 knitr::kable(combined_summary)
f74fdae99053 planemo upload
mingchen0919
parents:
diff changeset
99 ```
3
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
100
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
101 ```{r 'function definition', echo=FALSE}
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
102 extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
103 f = readLines(fastqc_data)
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
104 start_line = grep(module_name, f)
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
105 end_module_lines = grep('END_MODULE', f)
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
106 end_line = end_module_lines[which(end_module_lines > start_line)[1]]
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
107 module_data = f[(start_line+1):(end_line-1)]
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
108 writeLines(module_data, '/tmp/temp.txt')
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
109 read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
110 }
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
111 ```