Mercurial > repos > davidvanzessen > report_clonality_igg
comparison RScript.r @ 15:c6d0ee9b3d91 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 04 Dec 2014 10:53:23 -0500 |
parents | 8002401b83c4 |
children | ff84987df4f8 |
comparison
equal
deleted
inserted
replaced
14:8002401b83c4 | 15:c6d0ee9b3d91 |
---|---|
84 sampleFile <- file("samples.txt") | 84 sampleFile <- file("samples.txt") |
85 un = unique(inputdata$Sample) | 85 un = unique(inputdata$Sample) |
86 un = paste(un, sep="\n") | 86 un = paste(un, sep="\n") |
87 writeLines(un, sampleFile) | 87 writeLines(un, sampleFile) |
88 close(sampleFile) | 88 close(sampleFile) |
89 | |
90 # ---------------------- Counting the productive/unproductive and unique sequences ---------------------- | |
91 | |
92 inputdata.dt = data.table(inputdata) #for speed | |
93 | |
94 ct = unlist(strsplit(clonaltype, ",")) | |
95 if(clonaltype == "none"){ | |
96 ct = c("ID") | |
97 } | |
98 | |
99 inputdata.dt$samples_replicates = paste(inputdata.dt$Sample, inputdata.dt$Replicate, sep="_") | |
100 samples_replicates = c(unique(inputdata.dt$samples_replicates), unique(as.character(inputdata.dt$Sample))) | |
101 frequency_table = data.frame(ID = samples_replicates[order(samples_replicates)]) | |
102 | |
103 | |
104 sample_productive_count = inputdata.dt[, list(All=.N, | |
105 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), | |
106 perc_prod = 1, | |
107 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), | |
108 perc_prod_un = 1, | |
109 Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]), | |
110 perc_unprod = 1, | |
111 Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]), | |
112 perc_unprod_un = 1), | |
113 by=c("Sample")] | |
114 | |
115 sample_productive_count$perc_prod = round(sample_productive_count$Productive / sample_productive_count$All * 100) | |
116 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100) | |
117 | |
118 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100) | |
119 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100) | |
120 | |
121 | |
122 sample_replicate_productive_count = inputdata.dt[, list(All=.N, | |
123 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), | |
124 perc_prod = 1, | |
125 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]), | |
126 perc_prod_un = 1, | |
127 Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]), | |
128 perc_unprod = 1, | |
129 Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]), | |
130 perc_unprod_un = 1), | |
131 by=c("samples_replicates")] | |
132 | |
133 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100) | |
134 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100) | |
135 | |
136 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100) | |
137 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100) | |
138 | |
139 setnames(sample_replicate_productive_count, colnames(sample_productive_count)) | |
140 | |
141 counts = rbind(sample_replicate_productive_count, sample_productive_count) | |
142 counts = counts[order(counts$Sample),] | |
143 | |
144 write.table(x=counts, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F) | |
89 | 145 |
90 # ---------------------- Frequency calculation for V, D and J ---------------------- | 146 # ---------------------- Frequency calculation for V, D and J ---------------------- |
91 | 147 |
92 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) | 148 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) |
93 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) | 149 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) |