Mercurial > repos > artbio > gsc_signature_score
comparison signature_score.R @ 1:11ff7bd04308 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_signature_score commit 987e0ceb55e8de1d2f09d0f2ae48ff7cd3e82051
author | artbio |
---|---|
date | Tue, 09 Jul 2019 10:52:09 -0400 |
parents | bf99be04d098 |
children |
comparison
equal
deleted
inserted
replaced
0:bf99be04d098 | 1:11ff7bd04308 |
---|---|
61 make_option( | 61 make_option( |
62 "--stats", | 62 "--stats", |
63 default = "./statistics.tab", | 63 default = "./statistics.tab", |
64 type = 'character', | 64 type = 'character', |
65 help = "statistics path [default : '%default' ]" | 65 help = "statistics path [default : '%default' ]" |
66 ), | |
67 make_option( | |
68 "--correlations", | |
69 default = "./correlations.tab", | |
70 type = 'character', | |
71 help = "Correlations between signature genes [default : '%default' ]" | |
72 ), | |
73 make_option( | |
74 "--covariances", | |
75 default = "./statistics.tab", | |
76 type = 'character', | |
77 help = "Covariances between signature genes [default : '%default' ]" | |
66 ), | 78 ), |
67 make_option( | 79 make_option( |
68 "--pdf", | 80 "--pdf", |
69 default = "~/output.pdf", | 81 default = "~/output.pdf", |
70 type = 'character', | 82 type = 'character', |
98 logical_genes <- rownames(data.counts) %in% genes | 110 logical_genes <- rownames(data.counts) %in% genes |
99 | 111 |
100 # Retrieve target genes in counts data | 112 # Retrieve target genes in counts data |
101 signature.counts <- subset(data.counts, logical_genes) | 113 signature.counts <- subset(data.counts, logical_genes) |
102 | 114 |
115 # compute covariance | |
116 signature.covariances <- as.data.frame(cov(t(signature.counts))) | |
117 signature.covariances <- cbind(gene=rownames(signature.covariances), signature.covariances) | |
118 write.table(signature.covariances, file=opt$covariances, quote=F, row.names=F, sep="\t") | |
119 | |
120 # compute signature.correlations | |
121 signature.correlations <- as.data.frame(cov(t(signature.counts))) | |
122 signature.correlations <- cbind(gene=rownames(signature.correlations), signature.correlations) | |
123 write.table(signature.correlations, file=opt$correlations, quote=F, row.names=F, sep="\t") | |
103 | 124 |
104 ## Descriptive Statistics Function | 125 ## Descriptive Statistics Function |
105 descriptive_stats = function(InputData) { | 126 descriptive_stats = function(InputData) { |
106 SummaryData = data.frame( | 127 SummaryData = data.frame( |
107 mean = rowMeans(InputData), | 128 mean = rowMeans(InputData), |
169 order = rank(score, ties.method = "first"), | 190 order = rank(score, ties.method = "first"), |
170 signature = signature_output$rate, | 191 signature = signature_output$rate, |
171 stringsAsFactors = F) | 192 stringsAsFactors = F) |
172 | 193 |
173 pdf(file = opt$pdf) | 194 pdf(file = opt$pdf) |
174 | 195 myplot <- ggplot(signature_output, aes(x=rate, y=score)) + |
175 ggplot(score, aes(x = order, y = score)) + | 196 geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F, cex=0.5) + |
176 geom_line() + | 197 geom_abline(slope=0, intercept=mean(score$score), lwd=.5, color="red") + |
177 geom_segment(x = 0, xend = max(score$order[score$signature == "LOW"]), y = mean(score$score), yend = mean(score$score)) + | 198 scale_fill_manual(values=c("#ff0000", "#08661e")) + |
178 geom_area(aes(fill = signature), alpha = .7) + | 199 geom_jitter(size=0.2) + labs(y = "Score", x = "Rate") + |
179 scale_fill_manual(values=c("#ff0000", "#08661e")) + | 200 annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust=1.5, |
180 geom_text(aes(x = 1, y = mean(score)), label = "Mean", vjust = -0.3, colour = "black") + | 201 color="black", label = mean(score$score), parse = TRUE) + |
181 labs(title = "Ordered cell signature scores", x = "Cell index", y = "Score") | 202 labs(title = "Violin plots of Cell signature scores") |
182 | 203 |
183 density_score <- density(score$score) | 204 print(myplot) |
184 ggplot(data.frame(density_score[1:2]), aes(x, y, fill = ifelse(x < mean(score$score), "LOW", "HIGH"))) + | |
185 geom_line() + | |
186 geom_vline(xintercept = mean(score$score)) + | |
187 geom_text(x = mean(score$score), y = max(density_score$y), label = "Mean", hjust = -0.3, colour = "black") + | |
188 geom_area(alpha = .7) + | |
189 scale_fill_manual(values=c("#ff0000", "#08661e")) + | |
190 ylim(0, max(density_score$y)) + | |
191 labs( | |
192 title = "Distribution of Cell signature scores", | |
193 x = paste("N =", density_score$n, "Bandwidth =", density_score$bw), | |
194 y = "Density", | |
195 fill = "Signature" | |
196 ) | |
197 | |
198 # Check score independant of low expression | |
199 p_gene <- ggplot(signature_output, aes(rate, nGenes)) + | |
200 geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F) + | |
201 scale_fill_manual(values=c("#ff0000", "#08661e")) + | |
202 geom_jitter() + labs(y = "Number of detected genes", x = "Signature") | |
203 | |
204 p_counts <- ggplot(signature_output, aes(rate, total_counts)) + | |
205 geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F) + | |
206 scale_fill_manual(values=c("#ff0000", "#08661e")) + | |
207 geom_jitter() + labs(y = "Total counts", x = "Signature") | |
208 | |
209 grid.arrange(p_gene, p_counts, ncol = 2, top = "Influence of library sequencing depth on cell signature scores") | |
210 | |
211 dev.off() | 205 dev.off() |
212 | 206 |
213 # Save file | 207 # Save file |
214 write.table( | 208 write.table( |
215 signature_output, | 209 signature_output, |