# HG changeset patch
# User davidvanzessen
# Date 1432651071 14400
# Node ID 45554fd15511dcaad82141f70489b82bd90a68ad
# Parent 5ab17bdf2530f9b4f8914933a3e9b0dffa88e46b
Uploaded
diff -r 5ab17bdf2530 -r 45554fd15511 RScript.r
--- a/RScript.r Fri May 22 09:06:04 2015 -0400
+++ b/RScript.r Tue May 26 10:37:51 2015 -0400
@@ -136,10 +136,10 @@
patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence)
}
- scatterplot_data_columns = c("Patient", "Sample", "Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene")
+ scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns])
- scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),]
- scatterplot_data$type = factor(x="In one", levels=c("In one", "In Both"))
+ scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
+ scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both"))
scatterplot_data$on = onShort
patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge")
@@ -159,8 +159,8 @@
J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="")
#both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold
both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold
- one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge))
- two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge))
+ one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge))
+ two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge))
read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count))
read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count))
res1 = append(res1, sum(one))
@@ -187,9 +187,14 @@
if(nrow(scatterplot_locus_data) > 0){
scatterplot_locus_data$Rearrangement = product[iter, titleIndex]
}
- in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x)
- if(any(in_two)){
- scatterplot_locus_data[in_two,]$type = "In Both"
+ in_both = (scatterplot_locus_data$merge %in% patientMerge[both,]$merge)
+ if(any(in_both)){
+ scatterplot_locus_data[in_both,]$type = "In Both"
+ }
+ in_one = (scatterplot_locus_data$merge %in% patient1$merge)
+ not_in_one = !in_one
+ if(any(not_in_one)){
+ scatterplot_locus_data[not_in_one,]$type = twoSample
}
if(type == "single"){
single_patients <<- rbind(single_patients, scatterplot_locus_data)
@@ -197,10 +202,10 @@
p = NULL
if(nrow(scatterplot_locus_data) != 0){
if(on == "normalized_read_count"){
- scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
- p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales)
+ scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))
+ p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000))
} else {
- p = ggplot(scatterplot_locus_data, aes(type, Frequency))
+ p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100))
}
p = p + geom_point(aes(colour=type), position="jitter")
p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex]))
@@ -343,9 +348,9 @@
patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony])
- scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene")
+ scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge")
scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns])
- scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),]
+ scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),]
scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple"))
res1 = vector()
@@ -369,13 +374,13 @@
#all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold)
all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold)
- one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$Clone_Sequence.x %in% patientMerge[all,]$merge))
- one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$Clone_Sequence.x %in% patientMerge[all,]$merge))
- two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$Clone_Sequence.x %in% patientMerge[all,]$merge))
+ one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge))
+ one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge))
+ two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge))
- one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient1$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient1$Clone_Sequence %in% patientMerge13[one_three,]$merge))
- two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient2$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient2$Clone_Sequence %in% patientMerge23[two_three,]$merge))
- three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient3$Clone_Sequence %in% patientMerge13[one_three,]$merge) & !(patient3$Clone_Sequence %in% patientMerge23[two_three,]$merge))
+ one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge))
+ two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge))
+ three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge))
read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x))
read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y))
@@ -427,11 +432,11 @@
}
} else { #scatterplot data
scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),]
- in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge12[one_two,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge13[one_three,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge23[two_three,]$Clone_Sequence.x)
+ in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge)
if(sum(in_two) > 0){
scatterplot_locus_data[in_two,]$type = "In two"
}
- in_three = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x)
+ in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge)
if(sum(in_three)> 0){
scatterplot_locus_data[in_three,]$type = "In three"
}
diff -r 5ab17bdf2530 -r 45554fd15511 wrapper.sh
--- a/wrapper.sh Fri May 22 09:06:04 2015 -0400
+++ b/wrapper.sh Tue May 26 10:37:51 2015 -0400
@@ -52,7 +52,7 @@
echo "
" >> "$html"
echo "Ig/TCR gene rearrangement type | Proximal gene segment | Distal gene segment | Cut off value | Number of sequences ${patient}_Both | Number of sequences_$sample1 | Read Count $sample1 | Number of sequences_$sample2 | Read Count $sample2 | Sum number of sequences $patient | Percentage of sequences ${patient}_both | " >> "$html"
echo "" >> "$html"
- scatterplot_tab=""
+ scatterplot_tab="
" >> "$html"
- echo "${scatterplot_tab}
" >> "$html"
+ echo "${scatterplot_tab}
" >> "$html"
tail -n+2 ${patient}_reads.txt | sed "s/>//" > tmp.txt
echo "" >> "$html"
@@ -104,7 +104,7 @@
echo "
" >> "$html"
echo "Ig/TCR gene rearrangement type | Proximal gene segment | Distal gene segment | Cut off value | Number of sequences ${patient}_Both | Number of sequences_$sample1 | Read Count $sample1 | Number of sequences_$sample2 | Read Count $sample2 | Sum number of sequences $patient | Percentage of sequences ${patient}_both | " >> "$html"
echo "" >> "$html"
- scatterplot_tab=""
+ scatterplot_tab="
" >> "$html"
- echo "${scatterplot_tab}
" >> "$html"
+ echo "${scatterplot_tab}
" >> "$html"
echo "" >> "$html"
echo "" >> "$html"
echo "