# HG changeset patch
# User davidvanzessen
# Date 1425653993 18000
# Node ID f919965e8816b5fbcbb4abc3f1da540ccff6da64
# Parent 01f05258f67258bf8547bb2c0579c35b8c286358
Uploaded
diff -r 01f05258f672 -r f919965e8816 RScript.r
--- a/RScript.r Mon Feb 09 08:53:30 2015 -0500
+++ b/RScript.r Fri Mar 06 09:59:53 2015 -0500
@@ -23,6 +23,11 @@
}
library(reshape2)
+if (!("lymphclon" %in% rownames(installed.packages()))) {
+ install.packages("lymphclon", repos="http://cran.xl-mirror.nl/")
+}
+library(lymphclon)
+
# ---------------------- parameters ----------------------
args <- commandArgs(trailingOnly = TRUE)
@@ -35,6 +40,7 @@
species = args[5] #human or mouse
locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD
filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no)
+clonality_method = args[8]
# ---------------------- Data preperation ----------------------
@@ -217,7 +223,6 @@
useD = FALSE
cat("No D Genes in this species/locus")
}
-
print(paste("useD:", useD))
# ---------------------- merge with the frequency count ----------------------
@@ -453,84 +458,96 @@
if("Replicate" %in% colnames(inputdata)) #can only calculate clonality score when replicate information is available
{
- write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T)
-
- ClonalitySampleReplicatePrint <- function(dat){
- write.table(dat, paste("clonality_", unique(inputdata$Sample) , "_", unique(dat$Replicate), ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
- }
-
- clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,c("Sample", "Replicate")])
- #lapply(clonalityFrameSplit, FUN=ClonalitySampleReplicatePrint)
-
- ClonalitySamplePrint <- function(dat){
- write.table(dat, paste("clonality_", unique(inputdata$Sample) , ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
- }
-
- clonalityFrameSplit = split(clonalityFrame, f=clonalityFrame[,"Sample"])
- #lapply(clonalityFrameSplit, FUN=ClonalitySamplePrint)
-
- clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])
- clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])
- clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count
- clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])
- clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")
-
- ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15')
- tcct = textConnection(ct)
- CT = read.table(tcct, sep="\t", header=TRUE)
- close(tcct)
- clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T)
- clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight
-
- ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonality_clonaltype")])
- ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")])
- clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads)
- ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads
-
- ReplicatePrint <- function(dat){
- write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
+ if(clonality_method == "boyd"){
+ samples = split(clonalityFrame, clonalityFrame$Sample, drop=T)
+
+ for (sample in samples){
+ res = data.frame(paste=character(0))
+ sample_id = unique(sample$Sample)[[1]]
+ for(replicate in unique(sample$Replicate)){
+ tmp = sample[sample$Replicate == replicate,]
+ clone_table = data.frame(table(tmp$clonaltype))
+ clone_col_name = paste("V", replicate, sep="")
+ colnames(clone_table) = c("paste", clone_col_name)
+ res = merge(res, clone_table, by="paste", all=T)
+ }
+
+ res[is.na(res)] = 0
+ infer.result = infer.clonality(as.matrix(res[,2:ncol(res)]))
+
+ write.table(data.table(infer.result[[12]]), file=paste("lymphclon_clonality_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=F)
+
+ res$type = rowSums(res[,2:ncol(res)])
+
+ coincidence.table = data.frame(table(res$type))
+ colnames(coincidence.table) = c("Coincidence Type", "Raw Coincidence Freq")
+ write.table(coincidence.table, file=paste("lymphclon_coincidences_", sample_id, ".csv", sep=""), sep=",",quote=F,row.names=F,col.names=T)
+ }
+ } else {
+ write.table(clonalityFrame, "clonalityComplete.csv", sep=",",quote=F,row.names=F,col.names=T)
+
+ clonalFreq = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "clonaltype")])
+ clonalFreqCount = data.frame(data.table(clonalFreq)[, list(Count=.N), by=c("Sample", "Type")])
+ clonalFreqCount$realCount = clonalFreqCount$Type * clonalFreqCount$Count
+ clonalSum = data.frame(data.table(clonalFreqCount)[, list(Reads=sum(realCount)), by=c("Sample")])
+ clonalFreqCount = merge(clonalFreqCount, clonalSum, by.x="Sample", by.y="Sample")
+
+ ct = c('Type\tWeight\n2\t1\n3\t3\n4\t6\n5\t10\n6\t15')
+ tcct = textConnection(ct)
+ CT = read.table(tcct, sep="\t", header=TRUE)
+ close(tcct)
+ clonalFreqCount = merge(clonalFreqCount, CT, by.x="Type", by.y="Type", all.x=T)
+ clonalFreqCount$WeightedCount = clonalFreqCount$Count * clonalFreqCount$Weight
+
+ ReplicateReads = data.frame(data.table(clonalityFrame)[, list(Type=.N), by=c("Sample", "Replicate", "clonaltype")])
+ ReplicateReads = data.frame(data.table(ReplicateReads)[, list(Reads=.N), by=c("Sample", "Replicate")])
+ clonalFreqCount$Reads = as.numeric(clonalFreqCount$Reads)
+ ReplicateReads$squared = ReplicateReads$Reads * ReplicateReads$Reads
+
+ ReplicatePrint <- function(dat){
+ write.table(dat[-1], paste("ReplicateReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
+ }
+
+ ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
+ lapply(ReplicateSplit, FUN=ReplicatePrint)
+
+ ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")])
+ clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T)
+
+ ReplicateSumPrint <- function(dat){
+ write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
+ }
+
+ ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
+ lapply(ReplicateSumSplit, FUN=ReplicateSumPrint)
+
+ clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])
+ clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)
+ clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow
+ clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)
+ clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)
+
+ ClonalityScorePrint <- function(dat){
+ write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
+ }
+
+ clonalityScore = clonalFreqCount[c("Sample", "Result")]
+ clonalityScore = unique(clonalityScore)
+
+ clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])
+ lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)
+
+ clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]
+
+
+
+ ClonalityOverviewPrint <- function(dat){
+ write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
+ }
+
+ clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)
+ lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)
}
-
- ReplicateSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
- lapply(ReplicateSplit, FUN=ReplicatePrint)
-
- ReplicateReads = data.frame(data.table(ReplicateReads)[, list(ReadsSum=sum(as.numeric(Reads)), ReadsSquaredSum=sum(as.numeric(squared))), by=c("Sample")])
- clonalFreqCount = merge(clonalFreqCount, ReplicateReads, by.x="Sample", by.y="Sample", all.x=T)
-
-
- ReplicateSumPrint <- function(dat){
- write.table(dat[-1], paste("ReplicateSumReads_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
- }
-
- ReplicateSumSplit = split(ReplicateReads, f=ReplicateReads[,"Sample"])
- lapply(ReplicateSumSplit, FUN=ReplicateSumPrint)
-
- clonalFreqCountSum = data.frame(data.table(clonalFreqCount)[, list(Numerator=sum(WeightedCount, na.rm=T)), by=c("Sample")])
- clonalFreqCount = merge(clonalFreqCount, clonalFreqCountSum, by.x="Sample", by.y="Sample", all.x=T)
- clonalFreqCount$ReadsSum = as.numeric(clonalFreqCount$ReadsSum) #prevent integer overflow
- clonalFreqCount$Denominator = (((clonalFreqCount$ReadsSum * clonalFreqCount$ReadsSum) - clonalFreqCount$ReadsSquaredSum) / 2)
- clonalFreqCount$Result = (clonalFreqCount$Numerator + 1) / (clonalFreqCount$Denominator + 1)
-
- ClonalityScorePrint <- function(dat){
- write.table(dat$Result, paste("ClonalityScore_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
- }
-
- clonalityScore = clonalFreqCount[c("Sample", "Result")]
- clonalityScore = unique(clonalityScore)
-
- clonalityScoreSplit = split(clonalityScore, f=clonalityScore[,"Sample"])
- lapply(clonalityScoreSplit, FUN=ClonalityScorePrint)
-
- clonalityOverview = clonalFreqCount[c("Sample", "Type", "Count", "Weight", "WeightedCount")]
-
-
-
- ClonalityOverviewPrint <- function(dat){
- write.table(dat[-1], paste("ClonalityOverView_", unique(dat[1])[1,1] , ".csv", sep=""), sep=",",quote=F,na="-",row.names=F,col.names=F)
- }
-
- clonalityOverviewSplit = split(clonalityOverview, f=clonalityOverview$Sample)
- lapply(clonalityOverviewSplit, FUN=ClonalityOverviewPrint)
}
imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")
diff -r 01f05258f672 -r f919965e8816 r_wrapper.sh
--- a/r_wrapper.sh Mon Feb 09 08:53:30 2015 -0500
+++ b/r_wrapper.sh Fri Mar 06 09:59:53 2015 -0500
@@ -7,6 +7,7 @@
species=$5
locus=$6
filterproductive=$7
+clonality_method=$8
dir="$(cd "$(dirname "$0")" && pwd)"
useD="false"
if grep -q "$species.*${locus}D" "$dir/genes.txt" ; then
@@ -22,9 +23,10 @@
useD="false"
fi
fi
+useD="false"
mkdir $3
cp $dir/genes.txt $outputDir
-Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive 2>&1
+Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive ${clonality_method} 2>&1
cp $dir/tabber.js $outputDir
cp $dir/style.css $outputDir
cp $dir/script.js $outputDir
@@ -89,34 +91,49 @@
hasReplicateColumn="$(if head -n 1 $inputFile | grep -q 'Replicate'; then echo 'Yes'; else echo 'No'; fi)"
echo "$hasReplicateColumn"
#if its a 'new' merged file with replicate info
-if [[ "$hasReplicateColumn" == "Yes" ]] ; then
+if [[ "$hasReplicateColumn" == "Yes" && "$clonalType" != "none" ]] ; then
echo "
" >> $outputFile
for sample in $samples; do
- clonalityScore="$(cat $outputDir/ClonalityScore_$sample.csv)"
- echo "
" >> $outputFile
- echo "Clonality Score: $clonalityScore |
" >> $outputFile
+ echo "${clonality_method}"
+ if [[ "${clonality_method}" == "old" ]] ; then
+ echo "in old"
+ clonalityScore="$(cat $outputDir/ClonalityScore_$sample.csv)"
+ echo "" >> $outputFile
+ echo "Clonality Score: $clonalityScore |
" >> $outputFile
- #replicate,reads,squared
- echo "Replicate ID | Number of Reads | Reads Squared | |
" >> $outputFile
- while IFS=, read replicate reads squared
- do
+ #replicate,reads,squared
+ echo "Replicate ID | Number of Reads | Reads Squared | |
" >> $outputFile
+ while IFS=, read replicate reads squared
+ do
+
+ echo "$replicate | $reads | $squared | |
" >> $outputFile
+ done < $outputDir/ReplicateReads_$sample.csv
- echo "$replicate | $reads | $squared | |
" >> $outputFile
- done < $outputDir/ReplicateReads_$sample.csv
-
- #sum of reads and reads squared
- while IFS=, read readsSum squaredSum
+ #sum of reads and reads squared
+ while IFS=, read readsSum squaredSum
+ do
+ echo "Sum | $readsSum | $squaredSum |
" >> $outputFile
+ done < $outputDir/ReplicateSumReads_$sample.csv
+
+ #overview
+ echo "Coincidence Type | Raw Coincidence Freq | Coincidence Weight | Coincidences, Weighted |
" >> $outputFile
+ while IFS=, read type count weight weightedCount
do
- echo "Sum | $readsSum | $squaredSum |
" >> $outputFile
- done < $outputDir/ReplicateSumReads_$sample.csv
-
- #overview
- echo "Coincidence Type | Raw Coincidence Freq | Coincidence Weight | Coincidences, Weighted |
" >> $outputFile
- while IFS=, read type count weight weightedCount
- do
- echo "$type | $count | $weight | $weightedCount |
" >> $outputFile
- done < $outputDir/ClonalityOverView_$sample.csv
- echo "
" >> $outputFile
+ echo "$type | $count | $weight | $weightedCount |
" >> $outputFile
+ done < $outputDir/ClonalityOverView_$sample.csv
+ echo "
" >> $outputFile
+ else
+ echo "in new"
+ clonalityScore="$(cat $outputDir/lymphclon_clonality_${sample}.csv)"
+ echo "
" >> $outputFile
+ echo "Lymphclon clonality score:
$clonalityScore
" >> $outputFile
+ echo "
" >> $outputFile
+ while IFS=, read type count
+ do
+ echo "$type | $count |
" >> $outputFile
+ done < $outputDir/lymphclon_coincidences_$sample.csv
+ echo "
" >> $outputFile
+ fi
done
echo "
" >> $outputFile
fi
diff -r 01f05258f672 -r f919965e8816 report_clonality_igg.xml
--- a/report_clonality_igg.xml Mon Feb 09 08:53:30 2015 -0500
+++ b/report_clonality_igg.xml Fri Mar 06 09:59:53 2015 -0500
@@ -2,14 +2,14 @@
#if $gene_selection.source == "imgtdb"
- r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive
+ r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive $clonality_method
#else
- r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive
+ r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method
#end if
-
+
@@ -89,6 +89,12 @@
+
+
+
+
+
+