comparison RScript.r @ 27:f919965e8816 draft

Uploaded
author davidvanzessen
date Fri, 06 Mar 2015 09:59:53 -0500
parents 01f05258f672
children
comparison
equal deleted inserted replaced
26:01f05258f672 27:f919965e8816
20 20
21 if (!("reshape2" %in% rownames(installed.packages()))) { 21 if (!("reshape2" %in% rownames(installed.packages()))) {
22 install.packages("reshape2", repos="http://cran.xl-mirror.nl/") 22 install.packages("reshape2", repos="http://cran.xl-mirror.nl/")
23 } 23 }
24 library(reshape2) 24 library(reshape2)
25
26 if (!("lymphclon" %in% rownames(installed.packages()))) {
27 install.packages("lymphclon", repos="http://cran.xl-mirror.nl/")
28 }
29 library(lymphclon)
25 30
26 # ---------------------- parameters ---------------------- 31 # ---------------------- parameters ----------------------
27 32
28 args <- commandArgs(trailingOnly = TRUE) 33 args <- commandArgs(trailingOnly = TRUE)
29 34
33 clonaltype = args[4] #clonaltype definition, or 'none' for no unique filtering 38 clonaltype = args[4] #clonaltype definition, or 'none' for no unique filtering
34 ct = unlist(strsplit(clonaltype, ",")) 39 ct = unlist(strsplit(clonaltype, ","))
35 species = args[5] #human or mouse 40 species = args[5] #human or mouse
36 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD 41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD
37 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) 42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no)
43 clonality_method = args[8]
38 44
39 # ---------------------- Data preperation ---------------------- 45 # ---------------------- Data preperation ----------------------
40 46
41 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") 47 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="")
42 48
215 useD = TRUE 221 useD = TRUE
216 if(nrow(Dchain) == 0){ 222 if(nrow(Dchain) == 0){
217 useD = FALSE 223 useD = FALSE
218 cat("No D Genes in this species/locus") 224 cat("No D Genes in this species/locus")
219 } 225 }
220
221 print(paste("useD:", useD)) 226 print(paste("useD:", useD))
222 227
223 # ---------------------- merge with the frequency count ---------------------- 228 # ---------------------- merge with the frequency count ----------------------
224 229
225 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) 230 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE)
451 456
452 # ---------------------- calculating the clonality score ---------------------- 457 # ---------------------- calculating the clonality score ----------------------
453 458
454 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available 459 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available
455 { 460 {
456 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) 461 if(clonality_method == "boyd"){
457 462 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T)
458 ClonalitySampleReplicatePrint <- function(dat){ 463
459 write.table(dat, paste("clonality_", unique(inputdata$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) 464 for (sample in samples){
465 res = data.frame(paste=character(0))
466 sample_id = unique(sample$Sample)[[1]]
467 for(replicate in unique(sample$Replicate)){
468 tmp = sample[sample$Replicate == replicate,]
469 clone_table = data.frame(table(tmp$clonaltype))
470 clone_col_name = paste("V", replicate, sep="")
471 colnames(clone_table) = c("paste", clone_col_name)
472 res = merge(res, clone_table, by="paste", all=T)
473 }
474
475 res[is.na(res)] = 0
476 infer.result = infer.clonality(as.matrix(res[,2:ncol(res)]))
477
478 write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F)
479
480 res$type = rowSums(res[,2:ncol(res)])
481
482 coincidence.table = data.frame(table(res$type))
483 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq")
484 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
485 }
486 } else {
487 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T)
488
489 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])
490 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])
491 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count
492 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])
493 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")
494
495 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15')
496 tcct = textConnection(ct)
497 CT = read.table(tcct, sep="\t", header=TRUE)
498 close(tcct)
499 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T)
500 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight
501
502 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")])
503 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")])
504 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads)
505 ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads
506
507 ReplicatePrint <- function(dat){
508 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
509 }
510
511 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
512 lapply(ReplicateSplit, FUN=ReplicatePrint)
513
514 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")])
515 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T)
516
517 ReplicateSumPrint <- function(dat){
518 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
519 }
520
521 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
522 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint)
523
524 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])
525 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)
526 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow
527 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)
528 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)
529
530 ClonalityScorePrint <- function(dat){
531 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
532 }
533
534 clonalityScore = clonalFreqCount[c("Sample", "Result")]
535 clonalityScore = unique(clonalityScore)
536
537 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])
538 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)
539
540 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]
541
542
543
544 ClonalityOverviewPrint <- function(dat){
545 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
546 }
547
548 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)
549 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)
460 } 550 }
461
462 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")])
463 #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint)
464
465 ClonalitySamplePrint <- function(dat){
466 write.table(dat, paste("clonality_", unique(inputdata$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
467 }
468
469 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"])
470 #lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint)
471
472 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])
473 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])
474 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count
475 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])
476 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")
477
478 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15')
479 tcct = textConnection(ct)
480 CT = read.table(tcct, sep="\t", header=TRUE)
481 close(tcct)
482 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T)
483 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight
484
485 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonality_clonaltype")])
486 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")])
487 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads)
488 ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads
489
490 ReplicatePrint <- function(dat){
491 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
492 }
493
494 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
495 lapply(ReplicateSplit, FUN=ReplicatePrint)
496
497 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")])
498 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T)
499
500
501 ReplicateSumPrint <- function(dat){
502 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
503 }
504
505 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
506 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint)
507
508 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])
509 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)
510 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow
511 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)
512 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)
513
514 ClonalityScorePrint <- function(dat){
515 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
516 }
517
518 clonalityScore = clonalFreqCount[c("Sample", "Result")]
519 clonalityScore = unique(clonalityScore)
520
521 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])
522 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)
523
524 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]
525
526
527
528 ClonalityOverviewPrint <- function(dat){
529 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
530 }
531
532 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)
533 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)
534 } 551 }
535 552
536 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") 553 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")
537 if(all(imgtcolumns %in% colnames(inputdata))) 554 if(all(imgtcolumns %in% colnames(inputdata)))
538 { 555 {