comparison report_clonality/RScript.r @ 47:d97e1421aa86 draft

Uploaded
author davidvanzessen
date Wed, 27 Jan 2016 10:25:43 -0500
parents 2e0a7c35082e
children d08dfc8d5225
comparison
equal deleted inserted replaced
46:fee06348bfad 47:d97e1421aa86
39 ct = unlist(strsplit(clonaltype, ",")) 39 ct = unlist(strsplit(clonaltype, ","))
40 species = args[5] #human or mouse 40 species = args[5] #human or mouse
41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD 41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD
42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) 42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no)
43 clonality_method = args[8] 43 clonality_method = args[8]
44 filter_uniques = args[9]
44 45
45 # ---------------------- Data preperation ---------------------- 46 # ---------------------- Data preperation ----------------------
46 47
47 print("Report Clonality - Data preperation") 48 print("Report Clonality - Data preperation")
48 49
55 56
56 #remove the allele from the V,D and J genes 57 #remove the allele from the V,D and J genes
57 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) 58 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene)
58 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) 59 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene)
59 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) 60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene)
61
62 #filter uniques
63 inputdata.removed = inputdata[NULL,]
64
65 if(filter_uniques == "yes" && c("CDR1.Seq", "CDR2.Seq", "CDR3.Seq", "FR1.IMGT", "FR2.IMGT", "FR3.IMGT") %in% names(inputdata)){
66
67 clmns = names(inputdata)
68
69 inputdata$unique.def = paste(inputdata$CDR1.Seq, inputdata$CDR2.Seq, inputdata$CDR3.Seq, inputdata$FR1.IMGT, inputdata$FR2.IMGT, inputdata$FR3.IMGT)
70 inputdata.filtered = inputdata[duplicated(inputdata$unique.def),]
71 fltr = inputdata$unique.def %in% inputdata.filtered$unique.def
72
73 inputdata.removed = inputdata[!fltr,]
74 inputdata.removed$samples_replicates = paste(inputdata.removed$Sample, inputdata.removed$Replicate, sep="_")
75
76 inputdata = inputdata[fltr,]
77
78 inputdata = inputdata[,clmns]
79
80 write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T)
81 }
82
60 83
61 inputdata$clonaltype = 1:nrow(inputdata) 84 inputdata$clonaltype = 1:nrow(inputdata)
62 85
63 PRODF = inputdata 86 PRODF = inputdata
64 UNPROD = inputdata 87 UNPROD = inputdata
152 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) 175 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100)
153 176
154 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) 177 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100)
155 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) 178 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100)
156 179
180 inputdata.removed.s = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("Sample")]
181
182 sample_productive_count = merge(sample_productive_count, inputdata.removed.s, by="Sample")
183
184 sample_productive_count$perc_rem = round(sample_productive_count$UniqueRemoved / sample_productive_count$All * 100)
185
157 186
158 sample_replicate_productive_count = inputdata.dt[, list(All=.N, 187 sample_replicate_productive_count = inputdata.dt[, list(All=.N,
159 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), 188 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]),
160 perc_prod = 1, 189 perc_prod = 1,
161 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), 190 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]),
169 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) 198 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100)
170 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) 199 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100)
171 200
172 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) 201 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100)
173 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) 202 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100)
203
204 inputdata.removed.sr = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("samples_replicates")]
205
206 sample_replicate_productive_count = merge(sample_replicate_productive_count, inputdata.removed.sr, by="samples_replicates")
207
208 sample_replicate_productive_count$perc_rem = round(sample_replicate_productive_count$UniqueRemoved / sample_productive_count$All * 100)
209
174 210
175 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) 211 setnames(sample_replicate_productive_count, colnames(sample_productive_count))
176 212
177 counts = rbind(sample_replicate_productive_count, sample_productive_count) 213 counts = rbind(sample_replicate_productive_count, sample_productive_count)
178 counts = counts[order(counts$Sample),] 214 counts = counts[order(counts$Sample),]