# HG changeset patch # User artbio # Date 1562683929 14400 # Node ID 11ff7bd0430883dfa792aa5da758914a7fddcded # Parent bf99be04d098595fada8b8a26614116c268ca48a planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_signature_score commit 987e0ceb55e8de1d2f09d0f2ae48ff7cd3e82051 diff -r bf99be04d098 -r 11ff7bd04308 signature_score.R --- a/signature_score.R Mon Jun 24 07:22:06 2019 -0400 +++ b/signature_score.R Tue Jul 09 10:52:09 2019 -0400 @@ -65,6 +65,18 @@ help = "statistics path [default : '%default' ]" ), make_option( + "--correlations", + default = "./correlations.tab", + type = 'character', + help = "Correlations between signature genes [default : '%default' ]" + ), + make_option( + "--covariances", + default = "./statistics.tab", + type = 'character', + help = "Covariances between signature genes [default : '%default' ]" + ), + make_option( "--pdf", default = "~/output.pdf", type = 'character', @@ -100,6 +112,15 @@ # Retrieve target genes in counts data signature.counts <- subset(data.counts, logical_genes) +# compute covariance +signature.covariances <- as.data.frame(cov(t(signature.counts))) +signature.covariances <- cbind(gene=rownames(signature.covariances), signature.covariances) +write.table(signature.covariances, file=opt$covariances, quote=F, row.names=F, sep="\t") + +# compute signature.correlations +signature.correlations <- as.data.frame(cov(t(signature.counts))) +signature.correlations <- cbind(gene=rownames(signature.correlations), signature.correlations) +write.table(signature.correlations, file=opt$correlations, quote=F, row.names=F, sep="\t") ## Descriptive Statistics Function descriptive_stats = function(InputData) { @@ -171,43 +192,16 @@ stringsAsFactors = F) pdf(file = opt$pdf) - -ggplot(score, aes(x = order, y = score)) + - geom_line() + - geom_segment(x = 0, xend = max(score$order[score$signature == "LOW"]), y = mean(score$score), yend = mean(score$score)) + - geom_area(aes(fill = signature), alpha = .7) + - scale_fill_manual(values=c("#ff0000", "#08661e")) + - geom_text(aes(x = 1, y = mean(score)), label = "Mean", vjust = -0.3, colour = "black") + - labs(title = "Ordered cell signature scores", x = "Cell index", y = "Score") +myplot <- ggplot(signature_output, aes(x=rate, y=score)) + + geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F, cex=0.5) + + geom_abline(slope=0, intercept=mean(score$score), lwd=.5, color="red") + + scale_fill_manual(values=c("#ff0000", "#08661e")) + + geom_jitter(size=0.2) + labs(y = "Score", x = "Rate") + + annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust=1.5, + color="black", label = mean(score$score), parse = TRUE) + + labs(title = "Violin plots of Cell signature scores") -density_score <- density(score$score) -ggplot(data.frame(density_score[1:2]), aes(x, y, fill = ifelse(x < mean(score$score), "LOW", "HIGH"))) + - geom_line() + - geom_vline(xintercept = mean(score$score)) + - geom_text(x = mean(score$score), y = max(density_score$y), label = "Mean", hjust = -0.3, colour = "black") + - geom_area(alpha = .7) + - scale_fill_manual(values=c("#ff0000", "#08661e")) + - ylim(0, max(density_score$y)) + - labs( - title = "Distribution of Cell signature scores", - x = paste("N =", density_score$n, "Bandwidth =", density_score$bw), - y = "Density", - fill = "Signature" - ) - -# Check score independant of low expression -p_gene <- ggplot(signature_output, aes(rate, nGenes)) + - geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F) + - scale_fill_manual(values=c("#ff0000", "#08661e")) + - geom_jitter() + labs(y = "Number of detected genes", x = "Signature") - -p_counts <- ggplot(signature_output, aes(rate, total_counts)) + - geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F) + - scale_fill_manual(values=c("#ff0000", "#08661e")) + - geom_jitter() + labs(y = "Total counts", x = "Signature") - -grid.arrange(p_gene, p_counts, ncol = 2, top = "Influence of library sequencing depth on cell signature scores") - +print(myplot) dev.off() # Save file diff -r bf99be04d098 -r 11ff7bd04308 signature_score.xml --- a/signature_score.xml Mon Jun 24 07:22:06 2019 -0400 +++ b/signature_score.xml Tue Jul 09 10:52:09 2019 -0400 @@ -1,4 +1,4 @@ - + in single cell RNAseq r-optparse @@ -23,9 +23,11 @@ --output '$output' --stats '$stats' --pdf '$pdf' + --correlations '$correlations' + --covariances '$covariances' ]]> - + @@ -36,9 +38,11 @@ help="signature gene that are not expressed in at least this percentage of cells will not be kept to compute the effective signature" /> - + - + + + @@ -48,6 +52,8 @@ + + diff -r bf99be04d098 -r 11ff7bd04308 test-data/correlations.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/correlations.tsv Tue Jul 09 10:52:09 2019 -0400 @@ -0,0 +1,6 @@ +gene ZNF454 ACAD9 LAIR1 GAPDH CHTOP +ZNF454 11.9837926366092 1.26124926841842 -4.79209750033634 1.28119046792732 4.7040467133562 +ACAD9 1.26124926841842 36.4037213995154 0.987902621359844 1.57734537259074 3.12155500211722 +LAIR1 -4.79209750033634 0.987902621359844 58.8639861631431 -0.564659931985148 1.20262555258428 +GAPDH 1.28119046792732 1.57734537259074 -0.564659931985148 24.8789307434615 0.808639324069219 +CHTOP 4.7040467133562 3.12155500211722 1.20262555258428 0.808639324069219 44.8667386237237 diff -r bf99be04d098 -r 11ff7bd04308 test-data/covariances.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/covariances.tsv Tue Jul 09 10:52:09 2019 -0400 @@ -0,0 +1,6 @@ +gene ZNF454 ACAD9 LAIR1 GAPDH CHTOP +ZNF454 11.9837926366092 1.26124926841842 -4.79209750033634 1.28119046792732 4.7040467133562 +ACAD9 1.26124926841842 36.4037213995154 0.987902621359844 1.57734537259074 3.12155500211722 +LAIR1 -4.79209750033634 0.987902621359844 58.8639861631431 -0.564659931985148 1.20262555258428 +GAPDH 1.28119046792732 1.57734537259074 -0.564659931985148 24.8789307434615 0.808639324069219 +CHTOP 4.7040467133562 3.12155500211722 1.20262555258428 0.808639324069219 44.8667386237237 diff -r bf99be04d098 -r 11ff7bd04308 test-data/signature.pdf Binary file test-data/signature.pdf has changed