Mercurial > repos > artbio > gsc_correlation
diff correlation_with_signature.R @ 0:6f3da989b59d draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_correlation commit 70c7abb20f76651e61325e3aa90809f6874f8b85
| author | artbio |
|---|---|
| date | Mon, 24 Jun 2019 07:18:35 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/correlation_with_signature.R Mon Jun 24 07:18:35 2019 -0400 @@ -0,0 +1,137 @@ +# Performs multi-correlation analysis between the vectors of gene expressions +# in single cell RNAseq libraries and the vectors of signature scores in these +# same single cell RNAseq libraries. + +# Example of command +# Rscript correlations_with_signature.R --expression_file <expression_data.tsv> +# --signatures_file <signature_scores.tsv> +# --sep "\t" +# --colnames "T" +# --gene_corr <gene-gene corr file> +# --gene_corr_pval <gene-gene corr pvalues file> +# --sig_corr <genes correlation to signature file> + +# load packages that are provided in the conda env +options( show.error.messages=F, + error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") +requiredPackages = c('optparse', 'Hmisc') +warnings() + +library(optparse) +library(Hmisc) + +# Arguments +option_list = list( + make_option( + "--sep", + default = '\t', + type = 'character', + help = "File separator, must be the same for all input files [default : '%default' ]" + ), + make_option( + "--colnames", + default = TRUE, + type = 'logical', + help = "Consider first lines as header (must stand for all input files) [default : '%default' ]" + ), + make_option( + "--expression_file", + default = NA, + type = 'character', + help = "Input file that contains log2(CPM +1) expression values" + ), + make_option( + "--signatures_file", + default = NA, + type = 'character', + help = "Input file that contains cell signature" + ), + make_option( + "--sig_corr", + default = "sig_corr.tsv", + type = 'character', + help = "signature correlations output [default : '%default' ]" + ), + make_option( + "--gene_corr", + default = "gene_corr.tsv", + type = 'character', + help = "genes-genes correlations output [default : '%default' ]" + ), + make_option( + "--gene_corr_pval", + default = "gene_corr_pval.tsv", + type = 'character', + help = "genes-genes correlations pvalues output [default : '%default' ]" + ) +) + +opt = parse_args(OptionParser(option_list = option_list), + args = commandArgs(trailingOnly = TRUE)) + +if (opt$sep == "tab") {opt$sep = "\t"} +if (opt$sep == "comma") {opt$sep = ","} + +# Open files +data <- read.table( + opt$expression_file, + header = opt$colnames, + row.names = 1, + sep = opt$sep, + check.names = F +) +signature <- read.delim( + opt$signatures_file, + header = T, + stringsAsFactors = F, + row.names = 1, + sep = opt$sep, + check.names = F +) + + +# keep only signatures that are in the expression dataframe +signature <- subset(signature, rownames(signature) %in% colnames(data)) + +# Add signature score to expression matrix +data <- rbind(t(signature), data) + +# Gene correlation +gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells + +# Gene correlation with signature score +gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r), + Pearson_correlation = gene_corr$r[, 1], + p_value = gene_corr$P[, 1]) +gene_signature_corr <- gene_signature_corr[ order(gene_signature_corr[,2], decreasing = T), ] + + +# Save files +write.table( + gene_signature_corr, + file = opt$sig_corr, + sep = "\t", + quote = F, + col.names = T, + row.names = F +) + +r_genes <- data.frame(gene=rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output +p_genes <- data.frame(gene=rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output +write.table( + r_genes[-1,-2], + file = opt$gene_corr, + sep = "\t", + quote = F, + col.names = T, + row.names = F +) +write.table( + p_genes[-1,-2], + file = opt$gene_corr_pval, + sep = "\t", + quote = F, + col.names = T, + row.names = F +)
