Mercurial > repos > davidvanzessen > report_clonality_igg
comparison RScript.r @ 27:f919965e8816 draft
Uploaded
author | davidvanzessen |
---|---|
date | Fri, 06 Mar 2015 09:59:53 -0500 |
parents | 01f05258f672 |
children |
comparison
equal
deleted
inserted
replaced
26:01f05258f672 | 27:f919965e8816 |
---|---|
20 | 20 |
21 if (!("reshape2" %in% rownames(installed.packages()))) { | 21 if (!("reshape2" %in% rownames(installed.packages()))) { |
22 install.packages("reshape2", repos="http://cran.xl-mirror.nl/") | 22 install.packages("reshape2", repos="http://cran.xl-mirror.nl/") |
23 } | 23 } |
24 library(reshape2) | 24 library(reshape2) |
25 | |
26 if (!("lymphclon" %in% rownames(installed.packages()))) { | |
27 install.packages("lymphclon", repos="http://cran.xl-mirror.nl/") | |
28 } | |
29 library(lymphclon) | |
25 | 30 |
26 # ---------------------- parameters ---------------------- | 31 # ---------------------- parameters ---------------------- |
27 | 32 |
28 args <- commandArgs(trailingOnly = TRUE) | 33 args <- commandArgs(trailingOnly = TRUE) |
29 | 34 |
33 clonaltype = args[4] #clonaltype definition, or 'none' for no unique filtering | 38 clonaltype = args[4] #clonaltype definition, or 'none' for no unique filtering |
34 ct = unlist(strsplit(clonaltype, ",")) | 39 ct = unlist(strsplit(clonaltype, ",")) |
35 species = args[5] #human or mouse | 40 species = args[5] #human or mouse |
36 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD | 41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD |
37 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) | 42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) |
43 clonality_method = args[8] | |
38 | 44 |
39 # ---------------------- Data preperation ---------------------- | 45 # ---------------------- Data preperation ---------------------- |
40 | 46 |
41 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") | 47 inputdata = read.table(infile, sep="\t", header=TRUE, fill=T, comment.char="") |
42 | 48 |
215 useD = TRUE | 221 useD = TRUE |
216 if(nrow(Dchain) == 0){ | 222 if(nrow(Dchain) == 0){ |
217 useD = FALSE | 223 useD = FALSE |
218 cat("No D Genes in this species/locus") | 224 cat("No D Genes in this species/locus") |
219 } | 225 } |
220 | |
221 print(paste("useD:", useD)) | 226 print(paste("useD:", useD)) |
222 | 227 |
223 # ---------------------- merge with the frequency count ---------------------- | 228 # ---------------------- merge with the frequency count ---------------------- |
224 | 229 |
225 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) | 230 PRODFV = merge(PRODFV, Vchain, by.x='Top.V.Gene', by.y='v.name', all.x=TRUE) |
451 | 456 |
452 # ---------------------- calculating the clonality score ---------------------- | 457 # ---------------------- calculating the clonality score ---------------------- |
453 | 458 |
454 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available | 459 if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available |
455 { | 460 { |
456 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) | 461 if(clonality_method == "boyd"){ |
457 | 462 samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) |
458 ClonalitySampleReplicatePrint <- function(dat){ | 463 |
459 write.table(dat, paste("clonality_", unique(inputdata$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) | 464 for (sample in samples){ |
465 res = data.frame(paste=character(0)) | |
466 sample_id = unique(sample$Sample)[[1]] | |
467 for(replicate in unique(sample$Replicate)){ | |
468 tmp = sample[sample$Replicate == replicate,] | |
469 clone_table = data.frame(table(tmp$clonaltype)) | |
470 clone_col_name = paste("V", replicate, sep="") | |
471 colnames(clone_table) = c("paste", clone_col_name) | |
472 res = merge(res, clone_table, by="paste", all=T) | |
473 } | |
474 | |
475 res[is.na(res)] = 0 | |
476 infer.result = infer.clonality(as.matrix(res[,2:ncol(res)])) | |
477 | |
478 write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F) | |
479 | |
480 res$type = rowSums(res[,2:ncol(res)]) | |
481 | |
482 coincidence.table = data.frame(table(res$type)) | |
483 colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq") | |
484 write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) | |
485 } | |
486 } else { | |
487 write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) | |
488 | |
489 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) | |
490 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) | |
491 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count | |
492 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) | |
493 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") | |
494 | |
495 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') | |
496 tcct = textConnection(ct) | |
497 CT = read.table(tcct, sep="\t", header=TRUE) | |
498 close(tcct) | |
499 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) | |
500 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight | |
501 | |
502 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")]) | |
503 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) | |
504 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) | |
505 ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads | |
506 | |
507 ReplicatePrint <- function(dat){ | |
508 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | |
509 } | |
510 | |
511 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) | |
512 lapply(ReplicateSplit, FUN=ReplicatePrint) | |
513 | |
514 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) | |
515 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) | |
516 | |
517 ReplicateSumPrint <- function(dat){ | |
518 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | |
519 } | |
520 | |
521 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) | |
522 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) | |
523 | |
524 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) | |
525 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) | |
526 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow | |
527 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) | |
528 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) | |
529 | |
530 ClonalityScorePrint <- function(dat){ | |
531 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | |
532 } | |
533 | |
534 clonalityScore = clonalFreqCount[c("Sample", "Result")] | |
535 clonalityScore = unique(clonalityScore) | |
536 | |
537 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) | |
538 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) | |
539 | |
540 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] | |
541 | |
542 | |
543 | |
544 ClonalityOverviewPrint <- function(dat){ | |
545 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | |
546 } | |
547 | |
548 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) | |
549 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) | |
460 } | 550 } |
461 | |
462 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")]) | |
463 #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint) | |
464 | |
465 ClonalitySamplePrint <- function(dat){ | |
466 write.table(dat, paste("clonality_", unique(inputdata$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) | |
467 } | |
468 | |
469 clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"]) | |
470 #lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint) | |
471 | |
472 clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) | |
473 clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) | |
474 clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count | |
475 clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) | |
476 clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample") | |
477 | |
478 ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15') | |
479 tcct = textConnection(ct) | |
480 CT = read.table(tcct, sep="\t", header=TRUE) | |
481 close(tcct) | |
482 clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T) | |
483 clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight | |
484 | |
485 ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonality_clonaltype")]) | |
486 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")]) | |
487 clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads) | |
488 ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads | |
489 | |
490 ReplicatePrint <- function(dat){ | |
491 write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | |
492 } | |
493 | |
494 ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) | |
495 lapply(ReplicateSplit, FUN=ReplicatePrint) | |
496 | |
497 ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")]) | |
498 clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T) | |
499 | |
500 | |
501 ReplicateSumPrint <- function(dat){ | |
502 write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | |
503 } | |
504 | |
505 ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"]) | |
506 lapply(ReplicateSumSplit, FUN=ReplicateSumPrint) | |
507 | |
508 clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")]) | |
509 clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T) | |
510 clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow | |
511 clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2) | |
512 clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1) | |
513 | |
514 ClonalityScorePrint <- function(dat){ | |
515 write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | |
516 } | |
517 | |
518 clonalityScore = clonalFreqCount[c("Sample", "Result")] | |
519 clonalityScore = unique(clonalityScore) | |
520 | |
521 clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"]) | |
522 lapply(clonalityScoreSplit, FUN=ClonalityScorePrint) | |
523 | |
524 clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")] | |
525 | |
526 | |
527 | |
528 ClonalityOverviewPrint <- function(dat){ | |
529 write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F) | |
530 } | |
531 | |
532 clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample) | |
533 lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint) | |
534 } | 551 } |
535 | 552 |
536 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") | 553 imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb") |
537 if(all(imgtcolumns %in% colnames(inputdata))) | 554 if(all(imgtcolumns %in% colnames(inputdata))) |
538 { | 555 { |