Mercurial > repos > davidvanzessen > report_clonality_igg

--- a/RScript.r	Fri Nov 14 04:15:43 2014 -0500
+++ b/RScript.r	Thu Dec 04 10:53:23 2014 -0500
@@ -87,6 +87,62 @@
 writeLines(un, sampleFile)
 close(sampleFile)

+# ---------------------- Counting the productive/unproductive and unique sequences ----------------------
+
+inputdata.dt = data.table(inputdata) #for speed
+
+ct = unlist(strsplit(clonaltype, ","))
+if(clonaltype == "none"){
+	ct = c("ID")
+}
+
+inputdata.dt$samples_replicates = paste(inputdata.dt$Sample, inputdata.dt$Replicate, sep="_")
+samples_replicates = c(unique(inputdata.dt$samples_replicates), unique(as.character(inputdata.dt$Sample)))
+frequency_table = data.frame(ID = samples_replicates[order(samples_replicates)])
+
+
+sample_productive_count = inputdata.dt[, list(All=.N,
+                                              Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]),
+                                              perc_prod = 1,
+                                              Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]),
+                                              perc_prod_un = 1,
+                                              Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]),
+                                              perc_unprod = 1,
+                                              Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]),
+                                              perc_unprod_un = 1),
+                                       by=c("Sample")]
+
+sample_productive_count$perc_prod = round(sample_productive_count$Productive / sample_productive_count$All * 100)
+sample_productive_count$perc_prod_un = round(sample_productive_count$Productive_unique / sample_productive_count$All * 100)
+
+sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100)
+sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100)
+
+
+sample_replicate_productive_count = inputdata.dt[, list(All=.N,
+                                                        Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]),
+                                                        perc_prod = 1,
+                                                        Productive_unique = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",list(count=.N),by=ct]),
+                                                        perc_prod_un = 1,
+                                                        Unproductive= nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",]),
+                                                        perc_unprod = 1,
+                                                        Unproductive_unique =nrow(.SD[.SD$Functionality != "productive" & .SD$Functionality != "productive (see comment)",list(count=.N),by=ct]),
+                                                        perc_unprod_un = 1),
+                                                 by=c("samples_replicates")]
+
+sample_replicate_productive_count$perc_prod = round(sample_replicate_productive_count$Productive / sample_replicate_productive_count$All * 100)
+sample_replicate_productive_count$perc_prod_un = round(sample_replicate_productive_count$Productive_unique / sample_replicate_productive_count$All * 100)
+
+sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100)
+sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100)
+
+setnames(sample_replicate_productive_count, colnames(sample_productive_count))
+
+counts = rbind(sample_replicate_productive_count, sample_productive_count)
+counts = counts[order(counts$Sample),]
+
+write.table(x=counts, file="productive_counting.txt", sep=",",quote=F,row.names=F,col.names=F)
+
 # ---------------------- Frequency calculation for V, D and J ----------------------

 PRODFV = data.frame(data.table(PRODF)[, list(Length=sum(freq)), by=c("Sample", "Top.V.Gene")])
--- a/r_wrapper.sh	Fri Nov 14 04:15:43 2014 -0500
+++ b/r_wrapper.sh	Thu Dec 04 10:53:23 2014 -0500
@@ -19,7 +19,21 @@
 cp $dir/script.js $outputDir
 cp $dir/jquery-1.11.0.min.js $outputDir
 samples=`cat $outputDir/samples.txt`
-echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)</center></html>" > $2
+echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $2
+echo "<table border = 1>" >> $2
+echo "<thead><tr><th>Sample/Replicate</th><th>All</th><th>Productive</th><th>Unique Productive</th><th>Unproductive</th><th>Unique Unproductive</th></tr></thead>" >> $2
+while IFS=, read sample all productive perc_prod productive_unique perc_prod_un unproductive perc_unprod unproductive_unique perc_unprod_un
+	do
+		echo "<tr><td>$sample</td>" >> $2
+		echo "<td>$all</td>" >> $2
+		echo "<td>$productive (%${perc_prod})</td>" >> $2
+		echo "<td>$productive_unique (%${perc_prod_un})</td>" >> $2
+		echo "<td>$unproductive (%${perc_unprod})</td>" >> $2
+		echo "<td>$unproductive_unique (%${perc_unprod_un})</td></tr>" >> $2
+done < $outputDir/productive_counting.txt
+echo "</table border></center></html>" >> $2
+
+echo "productive_counting.txt"
 echo "<html><head><title>Report on:" >> $outputFile
 for sample in $samples; do
 	echo " $sample" >> $outputFile