comparison RScript.r @ 23:5f0597a3fd8b draft

Uploaded
author davidvanzessen
date Fri, 16 Jan 2015 07:37:41 -0500
parents 2555b94dbdb2
children 5454af6fece1
comparison
equal deleted inserted replaced
22:2555b94dbdb2 23:5f0597a3fd8b
164 PRODFJ = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.J.Gene")]) 164 PRODFJ = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.J.Gene")])
165 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length))) 165 Total = ddply(PRODFJ, .(Sample), function(x) data.frame(Total = sum(x$Length)))
166 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE) 166 PRODFJ = merge(PRODFJ, Total, by.x='Sample', by.y='Sample', all.x=TRUE)
167 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total)) 167 PRODFJ = ddply(PRODFJ, c("Sample", "Top.J.Gene"), summarise, relFreq= (Length*100 / Total))
168 168
169 # ---------------------- Setting up the gene names for the different T/B, human/mouse and locus ---------------------- 169 # ---------------------- Setting up the gene names for the different species/loci ----------------------
170 170
171 genes = read.table("genes.txt", sep="\t", header=TRUE, fill=T, comment.char="") 171 Vchain = ""
172 172 Dchain = ""
173 Vchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "V",c("IMGT.GENE.DB", "chr.order")] 173 Jchain = ""
174 colnames(Vchain) = c("v.name", "chr.orderV") 174
175 Dchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "D",c("IMGT.GENE.DB", "chr.order")] 175 if(species == "custom"){
176 colnames(Dchain) = c("v.name", "chr.orderD") 176 print("Custom genes: ")
177 Jchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "J",c("IMGT.GENE.DB", "chr.order")] 177 splt = unlist(strsplit(locus, ";"))
178 colnames(Jchain) = c("v.name", "chr.orderJ") 178 print(paste("V:", splt[1]))
179 179 print(paste("D:", splt[2]))
180 print(paste("J:", splt[3]))
181
182 Vchain = unlist(strsplit(splt[1], ","))
183 Vchain = data.frame(v.name = Vchain, chr.orderV = 1:length(Vchain))
184
185 Dchain = unlist(strsplit(splt[2], ","))
186 if(length(Dchain) > 0){
187 Dchain = data.frame(v.name = Dchain, chr.orderD = 1:length(Dchain))
188 } else {
189 Dchain = data.frame(v.name = character(0), chr.orderD = numeric(0))
190 }
191
192 Jchain = unlist(strsplit(splt[3], ","))
193 Jchain = data.frame(v.name = Jchain, chr.orderJ = 1:length(Jchain))
194
195 } else {
196 genes = read.table("genes.txt", sep="\t", header=TRUE, fill=T, comment.char="")
197
198 Vchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "V",c("IMGT.GENE.DB", "chr.order")]
199 colnames(Vchain) = c("v.name", "chr.orderV")
200 Dchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "D",c("IMGT.GENE.DB", "chr.order")]
201 colnames(Dchain) = c("v.name", "chr.orderD")
202 Jchain = genes[grepl(species, genes$Species) & genes$locus == locus & genes$region == "J",c("IMGT.GENE.DB", "chr.order")]
203 colnames(Jchain) = c("v.name", "chr.orderJ")
204 }
180 useD = TRUE 205 useD = TRUE
181 if(nrow(Dchain) == 0){ 206 if(nrow(Dchain) == 0){
182 useD = FALSE 207 useD = FALSE
183 cat("No D Genes in this species/locus") 208 cat("No D Genes in this species/locus")
184 } 209 }
210
211 print(paste("useD:", useD))
185 212
186 # ---------------------- merge with the frequency count ---------------------- 213 # ---------------------- merge with the frequency count ----------------------
187 214
188 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) 215 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE)
189 216