# HG changeset patch # User davidvanzessen # Date 1461935325 14400 # Node ID 55f18bf19d72d9739583c59706aa40890c2499a6 # Parent bd6fb6c03948201a194ea80e529a85c7815fd162 Uploaded diff -r bd6fb6c03948 -r 55f18bf19d72 report_clonality/RScript.r --- a/report_clonality/RScript.r Thu Apr 28 08:11:04 2016 -0400 +++ b/report_clonality/RScript.r Fri Apr 29 09:08:45 2016 -0400 @@ -158,13 +158,10 @@ sample.count$perc_prod_un = round(sample.count$Productive_unique / sample.count$All * 100) sample.count = merge(sample.count , unprod.sample.count, by="Sample", all.x=T) -print(sample.count) sample.count$perc_unprod = round(sample.count$Unproductive / sample.count$All * 100) sample.count = merge(sample.count, unprod.unique.sample.count, by="Sample", all.x=T) sample.count$perc_unprod_un = round(sample.count$Unproductive_unique / sample.count$All * 100) -print(sample.count) - #then sample/replicate rep.count = merge(input.rep.count, prod.rep.count, by=c("Sample", "Replicate"), all.x=T) rep.count$perc_prod = round(rep.count$Productive / rep.count$All * 100) @@ -547,6 +544,7 @@ if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available { print("Report Clonality - Clonality") + write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) if(clonality_method == "boyd"){ samples = split(clonalityFrame, clonalityFrame$Sample, drop=T) @@ -573,9 +571,23 @@ write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T) } } else { - write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T) - clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")]) + + #write files for every coincidence group of >1 + samples = unique(clonalFreq$Sample) + for(sample in samples){ + clonalFreqSample = clonalFreq[clonalFreq$Sample == sample,] + if(max(clonalFreqSample$Type) > 1){ + for(i in 2:max(clonalFreqSample$Type)){ + clonalFreqSample = clonalFreqSample[clonalFreqSample$Type == i,] + print(clonalFreqSample[clonalFreqSample$Type == i,]) + PRODF.sub = PRODF[PRODF$clonaltype %in% clonalFreqSample$clonaltype,] + PRODF.sub = PRODF.sub[order(PRODF.sub$clonaltype),] + write.table(PRODF.sub, file=paste("coincidences_", sample, "_", i, ".txt", sep=""), sep="\t",quote=F,row.names=F,col.names=T) + } + } + } + clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")]) clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")]) @@ -657,9 +669,8 @@ } - print(names(PRODF)) #ensure certain columns are in the data (files generated with older versions of IMGT Loader) - col.checks = c("N3.REGION.nt.nb", "N4.REGION.nt.nb") + col.checks = c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb") for(col.check in col.checks){ if(!(col.check %in% names(PRODF))){ print(paste(col.check, "not found adding new column")) @@ -676,8 +687,6 @@ } } - print(names(PRODF)) - num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) } newData = data.frame(data.table(PRODF)[,list(unique=.N, diff -r bd6fb6c03948 -r 55f18bf19d72 report_clonality/r_wrapper.sh --- a/report_clonality/r_wrapper.sh Thu Apr 28 08:11:04 2016 -0400 +++ b/report_clonality/r_wrapper.sh Fri Apr 29 09:08:45 2016 -0400 @@ -164,7 +164,6 @@ echo "