# HG changeset patch # User davidvanzessen # Date 1441974378 14400 # Node ID 623bbe972363ddafd7fd977f012d9cf4aa635dda # Parent d592dab2fca14a502ee559edbb9008501c3bb913 Uploaded diff -r d592dab2fca1 -r 623bbe972363 RScript.r --- a/RScript.r Mon Aug 31 09:28:38 2015 -0400 +++ b/RScript.r Fri Sep 11 08:26:18 2015 -0400 @@ -162,88 +162,100 @@ patient1.fuzzy$merge = paste(patient1.fuzzy$locus_V, patient1.fuzzy$locus_J, patient1.fuzzy$CDR3_Sense_Sequence) patient2.fuzzy$merge = paste(patient2.fuzzy$locus_V, patient2.fuzzy$locus_J, patient2.fuzzy$CDR3_Sense_Sequence) - merge.freq.table = data.frame(table(c(patient1.fuzzy$merge, patient2.fuzzy$merge))) + merge.freq.table = data.frame(table(c(patient1.fuzzy[!duplicated(patient1.fuzzy$merge),"merge"], patient2.fuzzy[!duplicated(patient2.fuzzy$merge),"merge"]))) merge.freq.table.gt.1 = merge.freq.table[merge.freq.table$Freq > 1,] patient1.fuzzy = patient1.fuzzy[patient1.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] patient2.fuzzy = patient2.fuzzy[patient2.fuzzy$merge %in% merge.freq.table.gt.1$Var1,] - while(nrow(patient1.fuzzy) > 0 & nrow(patient2.fuzzy) > 0){ - current.merge.1 = patient1.fuzzy[1,"merge"] - current.clone.seq.1 = patient1.fuzzy[1,"Clone_Sequence"] - current.merge.in.2 = patient2.fuzzy[patient2.fuzzy$merge == current.merge.1,] - - #agrep/adist the two samples - agrep.match = agrep(current.clone.seq.1, current.merge.in.2$Clone_Sequence, max.distance = 9, costs=list(insertions=1, deletions=1, substitutions=10)) + patient.fuzzy = rbind(patient1.fuzzy, patient2.fuzzy) + patient.fuzzy = patient.fuzzy[order(nchar(patient.fuzzy$Clone_Sequence)),] + + while(nrow(patient.fuzzy) > 1){ + first.merge = patient.fuzzy[1,"merge"] + first.clone.sequence = patient.fuzzy[1,"Clone_Sequence"] + + merge.filter = first.merge == patient.fuzzy$merge + + match.filter = merge.filter & grepl(first.clone.sequence, patient.fuzzy$Clone_Sequence) - - if(length(agrep.match) == 1){ - current.clone.seq.2 = patient2.fuzzy[agrep.match,"Clone_Sequence"] - patientMerge.new.row = data.frame(merge=current.clone.seq.1, - min_cell_paste.x=patient1.fuzzy[1,"min_cell_paste"], - Patient.x=patient1.fuzzy[1,"Patient"], - Receptor.x=patient1.fuzzy[1,"Receptor"], - Sample.x=patient1.fuzzy[1,"Sample"], - Cell_Count.x=patient1.fuzzy[1,"Cell_Count"], - Clone_Molecule_Count_From_Spikes.x=patient1.fuzzy[1,"Clone_Molecule_Count_From_Spikes"], - Log10_Frequency.x=patient1.fuzzy[1,"Log10_Frequency"], - Total_Read_Count.x=patient1.fuzzy[1,"Total_Read_Count"], - dsPerM.x=patient1.fuzzy[1,"dsPerM"], - J_Segment_Major_Gene.x=patient1.fuzzy[1,"J_Segment_Major_Gene"], - V_Segment_Major_Gene.x=patient1.fuzzy[1,"V_Segment_Major_Gene"], - Clone_Sequence.x=patient1.fuzzy[1,"Clone_Sequence"], - CDR3_Sense_Sequence.x=patient1.fuzzy[1,"CDR3_Sense_Sequence"], - Related_to_leukemia_clone.x=patient1.fuzzy[1,"Related_to_leukemia_clone"], - Frequency.x=patient1.fuzzy[1,"Frequency"], - locus_V.x=patient1.fuzzy[1,"locus_V"], - locus_J.x=patient1.fuzzy[1,"locus_J"], - min_cell_count.x=patient1.fuzzy[1,"min_cell_count"], - normalized_read_count.x=patient1.fuzzy[1,"normalized_read_count"], - paste.x=patient1.fuzzy[1,"paste"], - min_cell_paste.y=patient2.fuzzy[agrep.match,"min_cell_paste"], - Patient.y=patient2.fuzzy[agrep.match,"Patient"], - Receptor.y=patient2.fuzzy[agrep.match,"Receptor"], - Sample.y=patient2.fuzzy[agrep.match,"Sample"], - Cell_Count.y=patient2.fuzzy[agrep.match,"Cell_Count"], - Clone_Molecule_Count_From_Spikes.y=patient2.fuzzy[agrep.match,"Clone_Molecule_Count_From_Spikes"], - Log10_Frequency.y=patient2.fuzzy[agrep.match,"Log10_Frequency"], - Total_Read_Count.y=patient2.fuzzy[agrep.match,"Total_Read_Count"], - dsPerM.y=patient2.fuzzy[agrep.match,"dsPerM"], - J_Segment_Major_Gene.y=patient2.fuzzy[agrep.match,"J_Segment_Major_Gene"], - V_Segment_Major_Gene.y=patient2.fuzzy[agrep.match,"V_Segment_Major_Gene"], - Clone_Sequence.y=patient2.fuzzy[agrep.match,"Clone_Sequence"], - CDR3_Sense_Sequence.y=patient2.fuzzy[agrep.match,"CDR3_Sense_Sequence"], - Related_to_leukemia_clone.y=patient2.fuzzy[agrep.match,"Related_to_leukemia_clone"], - Frequency.y=patient2.fuzzy[agrep.match,"Frequency"], - locus_V.y=patient2.fuzzy[agrep.match,"locus_V"], - locus_J.y=patient2.fuzzy[agrep.match,"locus_J"], - min_cell_count.y=patient2.fuzzy[agrep.match,"min_cell_count"], - normalized_read_count.y=patient2.fuzzy[agrep.match,"normalized_read_count"], - paste.y=patient2.fuzzy[agrep.match,"paste"]) - #add to patientMerge - patientMerge = rbind(patientMerge, patientMerge.new.row) - #remove from patient*.fuzzy + if(sum(match.filter) == 2){ + second.match = which(match.filter)[2] + second.clone.sequence = patient.fuzzy[second.match,"Clone_Sequence"] + first.sample = patient.fuzzy[1,"Sample"] + second.sample = patient.fuzzy[second.match,"Sample"] - - #remove the fuzzy merged clone sequences from the original datasets - patient1 = patient1[patient1$Clone_Sequence != patient1.fuzzy[1,"Clone_Sequence"],] - patient2 = patient2[patient2$Clone_Sequence != patient2.fuzzy[agrep.match,"Clone_Sequence"],] - - scatterplot_data = scatterplot_data[scatterplot_data$merge != current.clone.seq.2,] - - patient2.fuzzy <<- patient2.fuzzy[-c(agrep.match),] - - - } else if (length(agrep.match) > 1){ - #multiple matches, whatdo? - cat(paste("