annotate 09_sequence_duplication_levels.Rmd @ 10:5286f3b1c9c2 draft default tip

planemo upload commit 363ef0a73d874c67326a67f51a78328906951248-dirty
author mingchen0919
date Fri, 30 Nov 2018 22:15:50 -0500
parents 94762f1cb779
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
1 ---
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
2 output: html_document
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
3 ---
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
4
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
5 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
8
94762f1cb779 planemo upload commit 76f9ca75d5b1c0c0fad6c10876d9dfeba7d5ecff-dirty
mingchen0919
parents: 3
diff changeset
6 knitr::opts_knit$set(progress = FALSE)
94762f1cb779 planemo upload commit 76f9ca75d5b1c0c0fad6c10876d9dfeba7d5ecff-dirty
mingchen0919
parents: 3
diff changeset
7 knitr::opts_chunk$set(error = TRUE, echo = FALSE)
1
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
8 ```
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
9
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
10
3
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
11 ### Sequence Duplication Levels
1
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
12
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
13 ```{r 'Sequence Duplication Levels', fig.width=10}
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
14 ## reads 1
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
15 sdl_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
16 names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
17 sdl_1$id = 1:length(sdl_1$Duplication_Level)
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
18
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
19 melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id'))
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
20 melt_sdl_1$trim = 'before'
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
21
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
22
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
23 ## reads 2
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
24 sdl_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
25 names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
26 sdl_2$id = 1:length(sdl_2$Duplication_Level)
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
27
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
28 melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id'))
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
29 melt_sdl_2$trim = 'after'
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
30
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
31 comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
32 comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
33
3
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
34 p = ggplot(data = comb_sdl) +
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
35 geom_line(mapping = aes(x = id, y = value, color = variable)) +
1
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
36 scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
37 facet_grid(. ~ trim) +
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
38 xlab('Sequence Duplication Level') +
645291efd2e7 working version
mingchen0919
parents: 0
diff changeset
39 ylab('') +
3
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
40 scale_color_discrete(name = '') +
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
41 theme(axis.text.x = element_text(size = 5),
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
42 panel.background = element_rect(fill = NA),
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
43 axis.line = element_line())
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
44 p
b9c9d1dacda6 reformat code
mingchen0919
parents: 1
diff changeset
45 ```