diff RScript.r @ 23:5f0597a3fd8b draft

Uploaded
author davidvanzessen
date Fri, 16 Jan 2015 07:37:41 -0500
parents 2555b94dbdb2
children 5454af6fece1
line wrap: on
line diff
--- a/RScript.r	Thu Jan 15 09:16:19 2015 -0500
+++ b/RScript.r	Fri Jan 16 07:37:41 2015 -0500
@@ -166,23 +166,50 @@
 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total))
 
-# ---------------------- Setting up the gene names for the different T/B, human/mouse and locus ----------------------
+# ---------------------- Setting up the gene names for the different species/loci ----------------------
 
-genes = read.table("genes.txt", sep="\t", header=TRUE, fill=T, comment.char="")
+Vchain = ""
+Dchain = ""
+Jchain = ""
 
-Vchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "V",c("IMGT.GENE.DB", "chr.order")]
-colnames(Vchain) = c("v.name", "chr.orderV")
-Dchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "D",c("IMGT.GENE.DB", "chr.order")]
-colnames(Dchain) = c("v.name", "chr.orderD")
-Jchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "J",c("IMGT.GENE.DB", "chr.order")]
-colnames(Jchain) = c("v.name", "chr.orderJ")
+if(species == "custom"){
+	print("Custom genes: ")
+	splt = unlist(strsplit(locus, ";"))
+	print(paste("V:", splt[1]))
+	print(paste("D:", splt[2]))
+	print(paste("J:", splt[3]))
+	
+	Vchain = unlist(strsplit(splt[1], ","))
+	Vchain = data.frame(v.name = Vchain, chr.orderV = 1:length(Vchain))
+	
+	Dchain = unlist(strsplit(splt[2], ","))
+	if(length(Dchain) > 0){
+		Dchain = data.frame(v.name = Dchain, chr.orderD = 1:length(Dchain))
+	} else {
+		Dchain = data.frame(v.name = character(0), chr.orderD = numeric(0))
+	}
+	
+	Jchain = unlist(strsplit(splt[3], ","))
+	Jchain = data.frame(v.name = Jchain, chr.orderJ = 1:length(Jchain))
 
+} else {
+	genes = read.table("genes.txt", sep="\t", header=TRUE, fill=T, comment.char="")
+
+	Vchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "V",c("IMGT.GENE.DB", "chr.order")]
+	colnames(Vchain) = c("v.name", "chr.orderV")
+	Dchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "D",c("IMGT.GENE.DB", "chr.order")]
+	colnames(Dchain) = c("v.name", "chr.orderD")
+	Jchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "J",c("IMGT.GENE.DB", "chr.order")]
+	colnames(Jchain) = c("v.name", "chr.orderJ")
+}
 useD = TRUE
 if(nrow(Dchain) == 0){
   useD = FALSE
   cat("No D Genes in this species/locus")
 }
 
+print(paste("useD:", useD))
+
 # ---------------------- merge with the frequency count ----------------------
 
 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE)