Mercurial > repos > davidvanzessen > clonal_sequences_in_paired_samples
changeset 30:45554fd15511 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 26 May 2015 10:37:51 -0400 |
parents | 5ab17bdf2530 |
children | ce8bd23d0335 |
files | RScript.r wrapper.sh |
diffstat | 2 files changed, 32 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/RScript.r Fri May 22 09:06:04 2015 -0400 +++ b/RScript.r Tue May 26 10:37:51 2015 -0400 @@ -136,10 +136,10 @@ patient2$merge = paste(patient2$V_Segment_Major_Gene, patient2$J_Segment_Major_Gene, patient2$CDR3_Sense_Sequence) } - scatterplot_data_columns = c("Patient", "Sample", "Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene") + scatterplot_data_columns = c("Patient", "Sample", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns]) - scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),] - scatterplot_data$type = factor(x="In one", levels=c("In one", "In Both")) + scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] + scatterplot_data$type = factor(x=oneSample, levels=c(oneSample, twoSample, "In Both")) scatterplot_data$on = onShort patientMerge = merge(patient1, patient2, by.x="merge", by.y="merge") @@ -159,8 +159,8 @@ J_Segment = paste(".*", as.character(product[iter,J_SegmentIndex]), ".*", sep="") #both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold) #both higher than threshold both = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) #highest of both is higher than threshold - one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[both,]$merge)) - two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[both,]$merge)) + one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[both,]$merge)) + two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[both,]$merge)) read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count)) read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count)) res1 = append(res1, sum(one)) @@ -187,9 +187,14 @@ if(nrow(scatterplot_locus_data) > 0){ scatterplot_locus_data$Rearrangement = product[iter, titleIndex] } - in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[both,]$Clone_Sequence.x) - if(any(in_two)){ - scatterplot_locus_data[in_two,]$type = "In Both" + in_both = (scatterplot_locus_data$merge %in% patientMerge[both,]$merge) + if(any(in_both)){ + scatterplot_locus_data[in_both,]$type = "In Both" + } + in_one = (scatterplot_locus_data$merge %in% patient1$merge) + not_in_one = !in_one + if(any(not_in_one)){ + scatterplot_locus_data[not_in_one,]$type = twoSample } if(type == "single"){ single_patients <<- rbind(single_patients, scatterplot_locus_data) @@ -197,10 +202,10 @@ p = NULL if(nrow(scatterplot_locus_data) != 0){ if(on == "normalized_read_count"){ - scales = 10^(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) - p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + scales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count)))) + p = ggplot(scatterplot_locus_data, aes(type, normalized_read_count)) + scale_y_log10(breaks=scales,labels=scales) + expand_limits(y=c(0,1000000)) } else { - p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + p = ggplot(scatterplot_locus_data, aes(type, Frequency)) + scale_y_continuous(limits = c(0, 100)) + expand_limits(y=c(0,100)) } p = p + geom_point(aes(colour=type), position="jitter") p = p + xlab("In one or both samples") + ylab(onShort) + ggtitle(paste(patient1[1,patientIndex], patient1[1,sampleIndex], patient2[1,sampleIndex], onShort, product[iter, titleIndex])) @@ -343,9 +348,9 @@ patientMerge23$thresholdValue = pmax(patientMerge23[,onx], patientMerge23[,ony]) - scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene") + scatterplot_data_columns = c("Clone_Sequence", "Frequency", "normalized_read_count", "V_Segment_Major_Gene", "J_Segment_Major_Gene", "merge") scatterplot_data = rbind(patient1[,scatterplot_data_columns], patient2[,scatterplot_data_columns], patient3[,scatterplot_data_columns]) - scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$Clone_Sequence),] + scatterplot_data = scatterplot_data[!duplicated(scatterplot_data$merge),] scatterplot_data$type = factor(x="In one", levels=c("In one", "In two", "In three", "In multiple")) res1 = vector() @@ -369,13 +374,13 @@ #all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge[,onx] > threshhold & patientMerge[,ony] > threshhold & patientMerge[,onz] > threshhold) all = (grepl(V_Segment, patientMerge$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge$J_Segment_Major_Gene.x) & patientMerge$thresholdValue > threshhold) - one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$Clone_Sequence.x %in% patientMerge[all,]$merge)) - one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$Clone_Sequence.x %in% patientMerge[all,]$merge)) - two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$Clone_Sequence.x %in% patientMerge[all,]$merge)) + one_two = (grepl(V_Segment, patientMerge12$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge12$J_Segment_Major_Gene.x) & patientMerge12$thresholdValue > threshhold & !(patientMerge12$merge %in% patientMerge[all,]$merge)) + one_three = (grepl(V_Segment, patientMerge13$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge13$J_Segment_Major_Gene.x) & patientMerge13$thresholdValue > threshhold & !(patientMerge13$merge %in% patientMerge[all,]$merge)) + two_three = (grepl(V_Segment, patientMerge23$V_Segment_Major_Gene.x) & grepl(J_Segment, patientMerge23$J_Segment_Major_Gene.x) & patientMerge23$thresholdValue > threshhold & !(patientMerge23$merge %in% patientMerge[all,]$merge)) - one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient1$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient1$Clone_Sequence %in% patientMerge13[one_three,]$merge)) - two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient2$Clone_Sequence %in% patientMerge12[one_two,]$merge) & !(patient2$Clone_Sequence %in% patientMerge23[two_three,]$merge)) - three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$Clone_Sequence %in% patientMerge[all,]$merge) & !(patient3$Clone_Sequence %in% patientMerge13[one_three,]$merge) & !(patient3$Clone_Sequence %in% patientMerge23[two_three,]$merge)) + one = (grepl(V_Segment, patient1$V_Segment_Major_Gene) & grepl(J_Segment, patient1$J_Segment_Major_Gene) & patient1[,on] > threshhold & !(patient1$merge %in% patientMerge[all,]$merge) & !(patient1$merge %in% patientMerge12[one_two,]$merge) & !(patient1$merge %in% patientMerge13[one_three,]$merge)) + two = (grepl(V_Segment, patient2$V_Segment_Major_Gene) & grepl(J_Segment, patient2$J_Segment_Major_Gene) & patient2[,on] > threshhold & !(patient2$merge %in% patientMerge[all,]$merge) & !(patient2$merge %in% patientMerge12[one_two,]$merge) & !(patient2$merge %in% patientMerge23[two_three,]$merge)) + three = (grepl(V_Segment, patient3$V_Segment_Major_Gene) & grepl(J_Segment, patient3$J_Segment_Major_Gene) & patient3[,on] > threshhold & !(patient3$merge %in% patientMerge[all,]$merge) & !(patient3$merge %in% patientMerge13[one_three,]$merge) & !(patient3$merge %in% patientMerge23[two_three,]$merge)) read1Count = append(read1Count, sum(patient1[one,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.x)) read2Count = append(read2Count, sum(patient2[two,]$normalized_read_count) + sum(patientMerge[all,]$normalized_read_count.y)) @@ -427,11 +432,11 @@ } } else { #scatterplot data scatterplot_locus_data = scatterplot_data[grepl(V_Segment, scatterplot_data$V_Segment_Major_Gene) & grepl(J_Segment, scatterplot_data$J_Segment_Major_Gene),] - in_two = (scatterplot_locus_data$Clone_Sequence %in% patientMerge12[one_two,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge13[one_three,]$Clone_Sequence.x) | (scatterplot_locus_data$Clone_Sequence %in% patientMerge23[two_three,]$Clone_Sequence.x) + in_two = (scatterplot_locus_data$merge %in% patientMerge12[one_two,]$merge) | (scatterplot_locus_data$merge %in% patientMerge13[one_three,]$merge) | (scatterplot_locus_data$merge %in% patientMerge23[two_three,]$merge) if(sum(in_two) > 0){ scatterplot_locus_data[in_two,]$type = "In two" } - in_three = (scatterplot_locus_data$Clone_Sequence %in% patientMerge[all,]$Clone_Sequence.x) + in_three = (scatterplot_locus_data$merge %in% patientMerge[all,]$merge) if(sum(in_three)> 0){ scatterplot_locus_data[in_three,]$type = "In three" }
--- a/wrapper.sh Fri May 22 09:06:04 2015 -0400 +++ b/wrapper.sh Tue May 26 10:37:51 2015 -0400 @@ -52,7 +52,7 @@ echo "<table border = 1 class='result_table summary_table' id='summary_table_${patient}_freq'>" >> "$html" echo "<thead><th>Ig/TCR gene rearrangement type</th><th>Proximal gene segment</th><th>Distal gene segment</th><th>Cut off value</th><th>Number of sequences ${patient}_Both</th><th>Number of sequences_$sample1</th><th>Read Count $sample1</th><th>Number of sequences_$sample2</th><th>Read Count $sample2</th><th>Sum number of sequences $patient</th><th>Percentage of sequences ${patient}_both</th></thead>" >> "$html" echo "<tbody>" >> "$html" - scatterplot_tab="<div class='tabbertab' title='Scatter Plots Reads'>" + scatterplot_tab="<div class='tabbertab' title='Scatter Plots Frequency'><table border='0'><tr>" while read locus j_segment v_segment cut_off_value both one read_count1 two read_count2 sum percent locusreadsum1 locusreadsum2 do if [ "$locus" != "$oldLocus" ] ; then @@ -86,7 +86,7 @@ echo "</tr>" >> "$html" oldLocus="$locus" if [ "${cut_off_value}" == "0" ] ; then - scatterplot_tab="${scatterplot_tab}<img src='${patient}_${sample1}_${sample2}_freq_${locus}_scatter.png' /><br />" + scatterplot_tab="${scatterplot_tab}<td><img src='${patient}_${sample1}_${sample2}_freq_${locus}_scatter.png' /></td>" fi done < tmp.txt echo "</tbody></table>" >> "$html" @@ -96,7 +96,7 @@ echo "<a href='${patient}_freq.png'><img src='${patient}_freq.png' width='1280' height='720' /></a><br />" >> "$html" echo "<a href='${patient}_freq_both.png'><img src='${patient}_freq_both.png' width='1280' height='720' /></a><br />" >> "$html" echo "<a href='${patient}_percent_freq.png'><img src='${patient}_percent_freq.png' width='1280' height='720' /></a></div>" >> "$html" - echo "${scatterplot_tab}</div>" >> "$html" + echo "${scatterplot_tab}</tr></table></div>" >> "$html" tail -n+2 ${patient}_reads.txt | sed "s/>//" > tmp.txt echo "<div class='tabbertab' title='Data reads'>" >> "$html" @@ -104,7 +104,7 @@ echo "<table border = 1 class='result_table summary_table' id='summary_table_${patient}_reads'>" >> "$html" echo "<thead><th>Ig/TCR gene rearrangement type</th><th>Proximal gene segment</th><th>Distal gene segment</th><th>Cut off value</th><th>Number of sequences ${patient}_Both</th><th>Number of sequences_$sample1</th><th>Read Count $sample1</th><th>Number of sequences_$sample2</th><th>Read Count $sample2</th><th>Sum number of sequences $patient</th><th>Percentage of sequences ${patient}_both</th></thead>" >> "$html" echo "<tbody>" >> "$html" - scatterplot_tab="<div class='tabbertab' title='Scatter Plots Frequency'>" + scatterplot_tab="<div class='tabbertab' title='Scatter Plots Reads'><table border='0'><tr>" while read locus j_segment v_segment cut_off_value both one read_count1 two read_count2 sum percent locusreadsum1 locusreadsum2 do if [ "$locus" != "$oldLocus" ] ; then @@ -138,7 +138,7 @@ echo "</tr>" >> "$html" oldLocus="$locus" if [ "${cut_off_value}" == "0" ] ; then - scatterplot_tab="${scatterplot_tab}<img src='${patient}_${sample1}_${sample2}_reads_${locus}_scatter.png' /><br />" + scatterplot_tab="${scatterplot_tab}<td><img src='${patient}_${sample1}_${sample2}_reads_${locus}_scatter.png' /></td>" fi done < tmp.txt echo "</tbody></table>" >> "$html" @@ -148,7 +148,7 @@ echo "<a href='${patient}_reads.png'><img src='${patient}_reads.png' width='1280' height='720' /></a><br />" >> "$html" echo "<a href='${patient}_reads_both.png'><img src='${patient}_reads_both.png' width='1280' height='720' /></a><br />" >> "$html" echo "<a href='${patient}_percent_reads.png'><img src='${patient}_percent_reads.png' width='1280' height='720' /></a></div>" >> "$html" - echo "${scatterplot_tab}</div>" >> "$html" + echo "${scatterplot_tab}</tr></table></div>" >> "$html" echo "</div>" >> "$html" echo "</div>" >> "$html" echo "</html>" >> "$html"