comparison report_clonality/RScript.r @ 48:d08dfc8d5225 draft

Uploaded
author davidvanzessen
date Wed, 27 Jan 2016 10:36:35 -0500
parents d97e1421aa86
children 2a79f9adf89b
comparison
equal deleted inserted replaced
47:d97e1421aa86 48:d08dfc8d5225
60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) 60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene)
61 61
62 #filter uniques 62 #filter uniques
63 inputdata.removed = inputdata[NULL,] 63 inputdata.removed = inputdata[NULL,]
64 64
65 if(filter_uniques == "yes" && c("CDR1.Seq", "CDR2.Seq", "CDR3.Seq", "FR1.IMGT", "FR2.IMGT", "FR3.IMGT") %in% names(inputdata)){ 65 filter_uniques = filter_uniques == "yes" && c("CDR1.Seq", "CDR2.Seq", "CDR3.Seq", "FR1.IMGT", "FR2.IMGT", "FR3.IMGT") %in% names(inputdata)
66
67 if(filter_uniques){
66 68
67 clmns = names(inputdata) 69 clmns = names(inputdata)
68 70
69 inputdata$unique.def = paste(inputdata$CDR1.Seq, inputdata$CDR2.Seq, inputdata$CDR3.Seq, inputdata$FR1.IMGT, inputdata$FR2.IMGT, inputdata$FR3.IMGT) 71 inputdata$unique.def = paste(inputdata$CDR1.Seq, inputdata$CDR2.Seq, inputdata$CDR3.Seq, inputdata$FR1.IMGT, inputdata$FR2.IMGT, inputdata$FR3.IMGT)
70 inputdata.filtered = inputdata[duplicated(inputdata$unique.def),] 72 inputdata.filtered = inputdata[duplicated(inputdata$unique.def),]
175 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) 177 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100)
176 178
177 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) 179 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100)
178 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) 180 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100)
179 181
180 inputdata.removed.s = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("Sample")] 182
181 183 if(filter_uniques){
182 sample_productive_count = merge(sample_productive_count, inputdata.removed.s, by="Sample") 184 inputdata.removed.s = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("Sample")]
183 185
184 sample_productive_count$perc_rem = round(sample_productive_count$UniqueRemoved / sample_productive_count$All * 100) 186 sample_productive_count = merge(sample_productive_count, inputdata.removed.s, by="Sample")
185 187
188 sample_productive_count$perc_rem = round(sample_productive_count$UniqueRemoved / sample_productive_count$All * 100)
189 } else {
190 sample_productive_count$UniqueRemoved = 0
191 sample_productive_count$perc_rem = 0
192 }
186 193
187 sample_replicate_productive_count = inputdata.dt[, list(All=.N, 194 sample_replicate_productive_count = inputdata.dt[, list(All=.N,
188 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), 195 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]),
189 perc_prod = 1, 196 perc_prod = 1,
190 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), 197 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]),
199 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) 206 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100)
200 207
201 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) 208 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100)
202 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) 209 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100)
203 210
204 inputdata.removed.sr = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("samples_replicates")] 211
205 212 if(filter_uniques){
206 sample_replicate_productive_count = merge(sample_replicate_productive_count, inputdata.removed.sr, by="samples_replicates") 213 inputdata.removed.sr = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("samples_replicates")]
207 214
208 sample_replicate_productive_count$perc_rem = round(sample_replicate_productive_count$UniqueRemoved / sample_productive_count$All * 100) 215 sample_replicate_productive_count = merge(sample_replicate_productive_count, inputdata.removed.sr, by="samples_replicates")
209 216
217 sample_replicate_productive_count$perc_rem = round(sample_replicate_productive_count$UniqueRemoved / sample_productive_count$All * 100)
218 } else {
219 sample_replicate_productive_count$UniqueRemoved = 0
220 sample_replicate_productive_count$perc_rem = 0
221 }
210 222
211 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) 223 setnames(sample_replicate_productive_count, colnames(sample_productive_count))
212 224
213 counts = rbind(sample_replicate_productive_count, sample_productive_count) 225 counts = rbind(sample_replicate_productive_count, sample_productive_count)
214 counts = counts[order(counts$Sample),] 226 counts = counts[order(counts$Sample),]