comparison RScript.r @ 15:c6d0ee9b3d91 draft

Uploaded
author davidvanzessen
date Thu, 04 Dec 2014 10:53:23 -0500
parents 8002401b83c4
children ff84987df4f8
comparison
equal deleted inserted replaced
14:8002401b83c4 15:c6d0ee9b3d91
84 sampleFile <- file("samples.txt") 84 sampleFile <- file("samples.txt")
85 un = unique(inputdata$Sample) 85 un = unique(inputdata$Sample)
86 un = paste(un, sep="\n") 86 un = paste(un, sep="\n")
87 writeLines(un, sampleFile) 87 writeLines(un, sampleFile)
88 close(sampleFile) 88 close(sampleFile)
89
90 # ---------------------- Counting the productive/unproductive and unique sequences ----------------------
91
92 inputdata.dt = data.table(inputdata) #for speed
93
94 ct = unlist(strsplit(clonaltype, ","))
95 if(clonaltype == "none"){
96 ct = c("ID")
97 }
98
99 inputdata.dt$samples_replicates = paste(inputdata.dt$Sample, inputdata.dt$Replicate, sep="_")
100 samples_replicates = c(unique(inputdata.dt$samples_replicates), unique(as.character(inputdata.dt$Sample)))
101 frequency_table = data.frame(ID = samples_replicates[order(samples_replicates)])
102
103
104 sample_productive_count = inputdata.dt[, list(All=.N,
105 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]),
106 perc_prod = 1,
107 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]),
108 perc_prod_un = 1,
109 Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]),
110 perc_unprod = 1,
111 Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]),
112 perc_unprod_un = 1),
113 by=c("Sample")]
114
115 sample_productive_count$perc_prod = round(sample_productive_count$Productive / sample_productive_count$All * 100)
116 sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100)
117
118 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100)
119 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100)
120
121
122 sample_replicate_productive_count = inputdata.dt[, list(All=.N,
123 Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]),
124 perc_prod = 1,
125 Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]),
126 perc_prod_un = 1,
127 Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]),
128 perc_unprod = 1,
129 Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]),
130 perc_unprod_un = 1),
131 by=c("samples_replicates")]
132
133 sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100)
134 sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100)
135
136 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100)
137 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100)
138
139 setnames(sample_replicate_productive_count, colnames(sample_productive_count))
140
141 counts = rbind(sample_replicate_productive_count, sample_productive_count)
142 counts = counts[order(counts$Sample),]
143
144 write.table(x=counts, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F)
89 145
90 # ---------------------- Frequency calculation for V, D and J ---------------------- 146 # ---------------------- Frequency calculation for V, D and J ----------------------
91 147
92 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")]) 148 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")])
93 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length))) 149 Total = ddply(PRODFV, .(Sample), function(x) data.frame(Total = sum(x$Length)))