Mercurial > repos > artbio > gsc_correlation
comparison correlation_with_signature.R @ 0:6f3da989b59d draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_correlation commit 70c7abb20f76651e61325e3aa90809f6874f8b85
| author | artbio |
|---|---|
| date | Mon, 24 Jun 2019 07:18:35 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:6f3da989b59d |
|---|---|
| 1 # Performs multi-correlation analysis between the vectors of gene expressions | |
| 2 # in single cell RNAseq libraries and the vectors of signature scores in these | |
| 3 # same single cell RNAseq libraries. | |
| 4 | |
| 5 # Example of command | |
| 6 # Rscript correlations_with_signature.R --expression_file <expression_data.tsv> | |
| 7 # --signatures_file <signature_scores.tsv> | |
| 8 # --sep "\t" | |
| 9 # --colnames "T" | |
| 10 # --gene_corr <gene-gene corr file> | |
| 11 # --gene_corr_pval <gene-gene corr pvalues file> | |
| 12 # --sig_corr <genes correlation to signature file> | |
| 13 | |
| 14 # load packages that are provided in the conda env | |
| 15 options( show.error.messages=F, | |
| 16 error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | |
| 17 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
| 18 requiredPackages = c('optparse', 'Hmisc') | |
| 19 warnings() | |
| 20 | |
| 21 library(optparse) | |
| 22 library(Hmisc) | |
| 23 | |
| 24 # Arguments | |
| 25 option_list = list( | |
| 26 make_option( | |
| 27 "--sep", | |
| 28 default = '\t', | |
| 29 type = 'character', | |
| 30 help = "File separator, must be the same for all input files [default : '%default' ]" | |
| 31 ), | |
| 32 make_option( | |
| 33 "--colnames", | |
| 34 default = TRUE, | |
| 35 type = 'logical', | |
| 36 help = "Consider first lines as header (must stand for all input files) [default : '%default' ]" | |
| 37 ), | |
| 38 make_option( | |
| 39 "--expression_file", | |
| 40 default = NA, | |
| 41 type = 'character', | |
| 42 help = "Input file that contains log2(CPM +1) expression values" | |
| 43 ), | |
| 44 make_option( | |
| 45 "--signatures_file", | |
| 46 default = NA, | |
| 47 type = 'character', | |
| 48 help = "Input file that contains cell signature" | |
| 49 ), | |
| 50 make_option( | |
| 51 "--sig_corr", | |
| 52 default = "sig_corr.tsv", | |
| 53 type = 'character', | |
| 54 help = "signature correlations output [default : '%default' ]" | |
| 55 ), | |
| 56 make_option( | |
| 57 "--gene_corr", | |
| 58 default = "gene_corr.tsv", | |
| 59 type = 'character', | |
| 60 help = "genes-genes correlations output [default : '%default' ]" | |
| 61 ), | |
| 62 make_option( | |
| 63 "--gene_corr_pval", | |
| 64 default = "gene_corr_pval.tsv", | |
| 65 type = 'character', | |
| 66 help = "genes-genes correlations pvalues output [default : '%default' ]" | |
| 67 ) | |
| 68 ) | |
| 69 | |
| 70 opt = parse_args(OptionParser(option_list = option_list), | |
| 71 args = commandArgs(trailingOnly = TRUE)) | |
| 72 | |
| 73 if (opt$sep == "tab") {opt$sep = "\t"} | |
| 74 if (opt$sep == "comma") {opt$sep = ","} | |
| 75 | |
| 76 # Open files | |
| 77 data <- read.table( | |
| 78 opt$expression_file, | |
| 79 header = opt$colnames, | |
| 80 row.names = 1, | |
| 81 sep = opt$sep, | |
| 82 check.names = F | |
| 83 ) | |
| 84 signature <- read.delim( | |
| 85 opt$signatures_file, | |
| 86 header = T, | |
| 87 stringsAsFactors = F, | |
| 88 row.names = 1, | |
| 89 sep = opt$sep, | |
| 90 check.names = F | |
| 91 ) | |
| 92 | |
| 93 | |
| 94 # keep only signatures that are in the expression dataframe | |
| 95 signature <- subset(signature, rownames(signature) %in% colnames(data)) | |
| 96 | |
| 97 # Add signature score to expression matrix | |
| 98 data <- rbind(t(signature), data) | |
| 99 | |
| 100 # Gene correlation | |
| 101 gene_corr <- rcorr(t(data), type = "pearson") # transpose because we correlate genes, not cells | |
| 102 | |
| 103 # Gene correlation with signature score | |
| 104 gene_signature_corr <- cbind.data.frame(gene = colnames(gene_corr$r), | |
| 105 Pearson_correlation = gene_corr$r[, 1], | |
| 106 p_value = gene_corr$P[, 1]) | |
| 107 gene_signature_corr <- gene_signature_corr[ order(gene_signature_corr[,2], decreasing = T), ] | |
| 108 | |
| 109 | |
| 110 # Save files | |
| 111 write.table( | |
| 112 gene_signature_corr, | |
| 113 file = opt$sig_corr, | |
| 114 sep = "\t", | |
| 115 quote = F, | |
| 116 col.names = T, | |
| 117 row.names = F | |
| 118 ) | |
| 119 | |
| 120 r_genes <- data.frame(gene=rownames(gene_corr$r), gene_corr$r) # add rownames as a variable for output | |
| 121 p_genes <- data.frame(gene=rownames(gene_corr$P), gene_corr$P) # add rownames as a variable for output | |
| 122 write.table( | |
| 123 r_genes[-1,-2], | |
| 124 file = opt$gene_corr, | |
| 125 sep = "\t", | |
| 126 quote = F, | |
| 127 col.names = T, | |
| 128 row.names = F | |
| 129 ) | |
| 130 write.table( | |
| 131 p_genes[-1,-2], | |
| 132 file = opt$gene_corr_pval, | |
| 133 sep = "\t", | |
| 134 quote = F, | |
| 135 col.names = T, | |
| 136 row.names = F | |
| 137 ) |
