# HG changeset patch
# User davidvanzessen
# Date 1418222956 18000
# Node ID eb5b569b44dd76f88c1221e8802d80b830d9717d
# Parent bc4612998d50dd34d79bc9b615273701f8988ddb
Uploaded
diff -r bc4612998d50 -r eb5b569b44dd ALL.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/ALL.xml Wed Dec 10 09:49:16 2014 -0500
@@ -0,0 +1,42 @@
+
+ Comparison of clonal sequences in paired samples
+
+ wrapper.sh $in_file $out_file $out_file.files_path $min_freq $min_cells
+
+
+
+
+
+
+
+
+
+
+Takes a tabular file as input, it needs to following columns:
+
++----------------------------------+----------------------------------------------+
+| **Column name** | **Column contents** |
++----------------------------------+----------------------------------------------+
+| Patient | The patient ID |
++----------------------------------+----------------------------------------------+
+| Sample | The Sample ID, one, two or three per Patient |
++----------------------------------+----------------------------------------------+
+| Cell_Count | The cell count within a sample/loci |
++----------------------------------+----------------------------------------------+
+| Clone_Molocule_Count_From_Spikes | The count of a clone |
++----------------------------------+----------------------------------------------+
+| Log10_Frequency | The frequency of a clone in log10 |
++----------------------------------+----------------------------------------------+
+| J_Segment_Major_Gene | The J Gene of this clone |
++----------------------------------+----------------------------------------------+
+| V_Segment_Major_Gene | The V Gene of this clone |
++----------------------------------+----------------------------------------------+
+| Clone_Sequence | The entire sequence |
++----------------------------------+----------------------------------------------+
+| CDR3_Sense_Sequence | The CDR3 sequence region. |
++----------------------------------+----------------------------------------------+
+
+
+And generate a detailed HTML report on the sequences found in indiviual samples and in both samples.
+
+
diff -r bc4612998d50 -r eb5b569b44dd CLL.xml
--- a/CLL.xml Mon Oct 06 05:57:55 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-
- Comparison of clonal sequences in paired samples
-
- wrapper.sh $in_file $out_file $out_file.files_path $min_freq $min_cells
-
-
-
-
-
-
-
-
-
-
- stuff and words
-
-
diff -r bc4612998d50 -r eb5b569b44dd RScript.r
--- a/RScript.r Mon Oct 06 05:57:55 2014 -0400
+++ b/RScript.r Wed Dec 10 09:49:16 2014 -0500
@@ -17,35 +17,51 @@
cat("
Reading input |
", file=logfile, append=T)
dat = read.table(inFile, header=T, sep="\t", dec=",", fill=T, stringsAsFactors=F)
dat = dat[!is.na(dat$Patient),]
+dat = dat[!duplicated(dat$Clone_Sequence), ]
setwd(outDir)
cat("Selecting first V/J Genes |
", file=logfile, append=T)
dat$V_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$V_Segment_Major_Gene), "; "), "[[", 1)))
dat$J_Segment_Major_Gene = as.factor(as.character(lapply(strsplit(as.character(dat$J_Segment_Major_Gene), "; "), "[[", 1)))
-str(dat)
+dat$Frequency = ((10^dat$Log10_Frequency)*100)
+
cat("Deduplication |
", file=logfile, append=T)
#dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")])
-most.common = function(x){
- ux = unique(x)
+most.common = function(x, ret="V"){
+ past = paste(x$V_Segment_Major_Gene, x$J_Segment_Major_Gene, sep=";")
+ ux = unique(past)
if(length(ux) > 1){
- xtdf = data.frame(table(x))
- return(xtdf$Var1[which.max(xtdf$Freq)])
+ xtdf = data.frame(table(past))
#print(xtdf)
+ res = unlist(strsplit(as.character(xtdf$past[which.max(xtdf$Freq)]), ";"))
+ #print(res)
+ if(ret == "V"){
+ return(res[1])
+ } else {
+ return(res[2])
+ }
}
- return(unique(x))
+
+ if(ret == "V"){
+ return(unique(x$V_Segment_Major_Gene))
+ } else {
+ return(unique(x$J_Segment_Major_Gene))
+ }
}
-dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), V_Segment_Major_Gene=most.common(.SD$V_Segment_Major_Gene), J_Segment_Major_Gene=most.common(.SD$J_Segment_Major_Gene), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "CDR3_Sense_Sequence")])
+dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), V_Segment_Major_Gene= as.character(most.common(.SD, ret="V")), J_Segment_Major_Gene= as.character(most.common(.SD, ret="J")), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Frequency=sum(.SD$Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "CDR3_Sense_Sequence")])
+dat = data.frame(data.table(dat)[, list(Patient=unique(.SD$Patient), Clone_Molecule_Count_From_Spikes=sum(.SD$Clone_Molecule_Count_From_Spikes), Log10_Frequency=sum(.SD$Log10_Frequency), Frequency=sum(.SD$Frequency), Total_Read_Count=sum(.SD$Total_Read_Count), Related_to_leukemia_clone=any(.SD$Related_to_leukemia_clone)), by=c("Sample", "Cell_Count", "J_Segment_Major_Gene", "V_Segment_Major_Gene", "CDR3_Sense_Sequence")])
cat("Calculating Frequency |
", file=logfile, append=T)
-dat$Frequency = ((10^dat$Log10_Frequency)*100)
+
dat = dat[dat$Frequency >= min_freq,]
-cat("Normalizing cell count to 1.000.000 |
", file=logfile, append=T)
-dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2)
+#cat("Normalizing cell count to 1.000.000 |
", file=logfile, append=T)
+#dat$normalized_read_count = round(dat$Clone_Molecule_Count_From_Spikes / dat$Cell_Count * 1000000 / 2)
+dat$normalized_read_count = dat$Clone_Molecule_Count_From_Spikes
dat = dat[dat$normalized_read_count >= min_cells,]
dat$paste = paste(dat$Sample, dat$V_Segment_Major_Gene, dat$J_Segment_Major_Gene, dat$CDR3_Sense_Sequence)
triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]
@@ -100,9 +116,12 @@
}
cat(paste("", patient, " |
", sep=""), file=logfile, append=T)
- patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
- patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
+ #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
+ #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
+ patient1$merge = paste(patient1$CDR3_Sense_Sequence)
+ patient2$merge = paste(patient2$CDR3_Sense_Sequence)
+ #patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
res1 = vector()
res2 = vector()
@@ -233,9 +252,13 @@
twoSample = paste(patient2[1,sampleIndex], sep="")
threeSample = paste(patient3[1,sampleIndex], sep="")
- patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
- patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
- patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
+ #patient1$merge = paste(patient1$V_Segment_Major_Gene, patient1$J_Segment_Major_Gene, patient1$CDR3_Sense_Sequence)
+ #patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
+ #patient3$merge = paste(patient3$V_Segment_Major_Gene, patient3$J_Segment_Major_Gene, patient3$CDR3_Sense_Sequence)
+
+ patient1$merge = paste(patient1$CDR3_Sense_Sequence)
+ patient2$merge = paste(patient2$CDR3_Sense_Sequence)
+ patient3$merge = paste(patient3$CDR3_Sense_Sequence)
patientMerge = merge(patient1, patient2, by="merge")
patientMerge = merge(patientMerge, patient3, by="merge")
@@ -271,19 +294,19 @@
if(threshhold != 0){
if(sum(one) > 0){
dfOne = patient1[one,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
- colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
+ colnames(dfOne) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone")
filenameOne = paste(label1, "_", product[iter, titleIndex], "_", threshhold, sep="")
write.table(dfOne, file=paste(filenameOne, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
}
if(sum(two) > 0){
dfTwo = patient2[two,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
- colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
+ colnames(dfTwo) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone")
filenameTwo = paste(label2, "_", product[iter, titleIndex], "_", threshhold, sep="")
write.table(dfTwo, file=paste(filenameTwo, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
}
if(sum(three) > 0){
dfThree = patient3[three,c("V_Segment_Major_Gene", "J_Segment_Major_Gene", "normalized_read_count", "Frequency", "CDR3_Sense_Sequence", "Related_to_leukemia_clone")]
- colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "Sequence", "Related_to_leukemia_clone")
+ colnames(dfThree) = c("Proximal segment", "Distal segment", "normalized_read_count", "Frequency", "CDR3 Sequence", "Related_to_leukemia_clone")
filenameThree = paste(label3, "_", product[iter, titleIndex], "_", threshhold, sep="")
write.table(dfThree, file=paste(filenameThree, ".txt", sep=""), quote=F, sep="\t", dec=",", row.names=F, col.names=T)
}
diff -r bc4612998d50 -r eb5b569b44dd script.js
--- a/script.js Mon Oct 06 05:57:55 2014 -0400
+++ b/script.js Wed Dec 10 09:49:16 2014 -0500
@@ -7,7 +7,11 @@
var tr = document.createElement('tr');
tr.className = "evenrowcolor";
var cells = lines[0].split("\t");
+ var cdr3column = 0;
for(var a = 0;a < cells.length;++a){
+ if(cells[a] == "CDR3 Sequence" || cells[a] == "CDR3_Sense_Sequence"){
+ cdr3column = a;
+ }
var td = document.createElement('td');
td.appendChild(document.createTextNode(cells[a]));
tr.appendChild(td);
@@ -16,7 +20,6 @@
tbl.appendChild(thead);
var tbdy = document.createElement('tbody');
-
for(var a = 1;a < lines.length;++a){
tr = document.createElement('tr');
var cells = lines[a].split("\t");
@@ -26,6 +29,9 @@
for(var b = 0;b < cells.length;++b){
td = document.createElement('td');
td.appendChild(document.createTextNode(cells[b]));
+ if(b == cdr3column){
+ td.setAttribute('style', 'text-align:right');
+ }
tr.appendChild(td)
}
if(a % 2 == 0){
@@ -40,6 +46,7 @@
}
function loadfile(file, patient, type){
+ patient = patient.replace(".", "\\.");
$('#hidden_div').load(file, function(){
$('#result_div_' + patient + '_' + type).html(tsvToTable($('#hidden_div').html()));
$('#result_div_' + patient + '_' + type + ' tr').hover(function() {