Mercurial > repos > davidvanzessen > argalaxy_tools
comparison report_clonality/RScript.r @ 47:d97e1421aa86 draft
Uploaded
author | davidvanzessen |
---|---|
date | Wed, 27 Jan 2016 10:25:43 -0500 |
parents | 2e0a7c35082e |
children | d08dfc8d5225 |
comparison
equal
deleted
inserted
replaced
46:fee06348bfad | 47:d97e1421aa86 |
---|---|
39 ct = unlist(strsplit(clonaltype, ",")) | 39 ct = unlist(strsplit(clonaltype, ",")) |
40 species = args[5] #human or mouse | 40 species = args[5] #human or mouse |
41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD | 41 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD |
42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) | 42 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no) |
43 clonality_method = args[8] | 43 clonality_method = args[8] |
44 filter_uniques = args[9] | |
44 | 45 |
45 # ---------------------- Data preperation ---------------------- | 46 # ---------------------- Data preperation ---------------------- |
46 | 47 |
47 print("Report Clonality - Data preperation") | 48 print("Report Clonality - Data preperation") |
48 | 49 |
55 | 56 |
56 #remove the allele from the V,D and J genes | 57 #remove the allele from the V,D and J genes |
57 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) | 58 inputdata$Top.V.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.V.Gene) |
58 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) | 59 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene) |
59 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) | 60 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene) |
61 | |
62 #filter uniques | |
63 inputdata.removed = inputdata[NULL,] | |
64 | |
65 if(filter_uniques == "yes" && c("CDR1.Seq", "CDR2.Seq", "CDR3.Seq", "FR1.IMGT", "FR2.IMGT", "FR3.IMGT") %in% names(inputdata)){ | |
66 | |
67 clmns = names(inputdata) | |
68 | |
69 inputdata$unique.def = paste(inputdata$CDR1.Seq, inputdata$CDR2.Seq, inputdata$CDR3.Seq, inputdata$FR1.IMGT, inputdata$FR2.IMGT, inputdata$FR3.IMGT) | |
70 inputdata.filtered = inputdata[duplicated(inputdata$unique.def),] | |
71 fltr = inputdata$unique.def %in% inputdata.filtered$unique.def | |
72 | |
73 inputdata.removed = inputdata[!fltr,] | |
74 inputdata.removed$samples_replicates = paste(inputdata.removed$Sample, inputdata.removed$Replicate, sep="_") | |
75 | |
76 inputdata = inputdata[fltr,] | |
77 | |
78 inputdata = inputdata[,clmns] | |
79 | |
80 write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T) | |
81 } | |
82 | |
60 | 83 |
61 inputdata$clonaltype = 1:nrow(inputdata) | 84 inputdata$clonaltype = 1:nrow(inputdata) |
62 | 85 |
63 PRODF = inputdata | 86 PRODF = inputdata |
64 UNPROD = inputdata | 87 UNPROD = inputdata |
152 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) | 175 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) |
153 | 176 |
154 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) | 177 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) |
155 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) | 178 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) |
156 | 179 |
180 inputdata.removed.s = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("Sample")] | |
181 | |
182 sample_productive_count = merge(sample_productive_count, inputdata.removed.s, by="Sample") | |
183 | |
184 sample_productive_count$perc_rem = round(sample_productive_count$UniqueRemoved / sample_productive_count$All * 100) | |
185 | |
157 | 186 |
158 sample_replicate_productive_count = inputdata.dt[, list(All=.N, | 187 sample_replicate_productive_count = inputdata.dt[, list(All=.N, |
159 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), | 188 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), |
160 perc_prod = 1, | 189 perc_prod = 1, |
161 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), | 190 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), |
169 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) | 198 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) |
170 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) | 199 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) |
171 | 200 |
172 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) | 201 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) |
173 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) | 202 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) |
203 | |
204 inputdata.removed.sr = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("samples_replicates")] | |
205 | |
206 sample_replicate_productive_count = merge(sample_replicate_productive_count, inputdata.removed.sr, by="samples_replicates") | |
207 | |
208 sample_replicate_productive_count$perc_rem = round(sample_replicate_productive_count$UniqueRemoved / sample_productive_count$All * 100) | |
209 | |
174 | 210 |
175 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) | 211 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) |
176 | 212 |
177 counts = rbind(sample_replicate_productive_count, sample_productive_count) | 213 counts = rbind(sample_replicate_productive_count, sample_productive_count) |
178 counts = counts[order(counts$Sample),] | 214 counts = counts[order(counts$Sample),] |