changeset 47:d97e1421aa86 draft

Uploaded
author davidvanzessen
date Wed, 27 Jan 2016 10:25:43 -0500
parents fee06348bfad
children d08dfc8d5225
files report_clonality/RScript.r report_clonality/r_wrapper.sh report_clonality_igg.xml
diffstat 3 files changed, 49 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/report_clonality/RScript.r	Wed Jan 27 08:39:16 2016 -0500
+++ b/report_clonality/RScript.r	Wed Jan 27 10:25:43 2016 -0500
@@ -41,6 +41,7 @@
 locus = args[6] # IGH, IGK, IGL, TRB, TRA, TRG or TRD
 filterproductive = ifelse(args[7] == "yes", T, F) #should unproductive sequences be filtered out? (yes/no)
 clonality_method = args[8]
+filter_uniques = args[9]
 
 # ---------------------- Data preperation ----------------------
 
@@ -58,6 +59,28 @@
 inputdata$Top.D.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.D.Gene)
 inputdata$Top.J.Gene = gsub("[*]([0-9]+)", "", inputdata$Top.J.Gene)
 
+#filter uniques
+inputdata.removed = inputdata[NULL,]
+
+if(filter_uniques == "yes" && c("CDR1.Seq", "CDR2.Seq", "CDR3.Seq", "FR1.IMGT", "FR2.IMGT", "FR3.IMGT") %in% names(inputdata)){
+	
+	clmns = names(inputdata)
+	
+	inputdata$unique.def = paste(inputdata$CDR1.Seq, inputdata$CDR2.Seq, inputdata$CDR3.Seq, inputdata$FR1.IMGT, inputdata$FR2.IMGT, inputdata$FR3.IMGT)
+	inputdata.filtered = inputdata[duplicated(inputdata$unique.def),]
+	fltr = inputdata$unique.def %in% inputdata.filtered$unique.def
+	
+	inputdata.removed = inputdata[!fltr,]
+	inputdata.removed$samples_replicates = paste(inputdata.removed$Sample, inputdata.removed$Replicate, sep="_")
+	
+	inputdata = inputdata[fltr,]
+	
+	inputdata = inputdata[,clmns]
+	
+	write.table(inputdata.removed, "unique_removed.csv", sep=",",quote=F,row.names=F,col.names=T)
+}
+
+
 inputdata$clonaltype = 1:nrow(inputdata)
 
 PRODF = inputdata
@@ -154,6 +177,12 @@
 sample_productive_count$perc_unprod = round(sample_productive_count$Unproductive / sample_productive_count$All * 100)
 sample_productive_count$perc_unprod_un = round(sample_productive_count$Unproductive_unique / sample_productive_count$All * 100)
 
+inputdata.removed.s = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("Sample")]
+
+sample_productive_count = merge(sample_productive_count, inputdata.removed.s, by="Sample")
+
+sample_productive_count$perc_rem = round(sample_productive_count$UniqueRemoved / sample_productive_count$All * 100)
+
 
 sample_replicate_productive_count = inputdata.dt[, list(All=.N, 
                                                         Productive = nrow(.SD[.SD$Functionality == "productive" | .SD$Functionality == "productive (see comment)",]), 
@@ -172,6 +201,13 @@
 sample_replicate_productive_count$perc_unprod = round(sample_replicate_productive_count$Unproductive / sample_replicate_productive_count$All * 100)
 sample_replicate_productive_count$perc_unprod_un = round(sample_replicate_productive_count$Unproductive_unique / sample_replicate_productive_count$All * 100)
 
+inputdata.removed.sr = data.table(inputdata.removed)[, list(UniqueRemoved=.N), by=c("samples_replicates")]
+
+sample_replicate_productive_count = merge(sample_replicate_productive_count, inputdata.removed.sr, by="samples_replicates")
+
+sample_replicate_productive_count$perc_rem = round(sample_replicate_productive_count$UniqueRemoved / sample_productive_count$All * 100)
+
+
 setnames(sample_replicate_productive_count, colnames(sample_productive_count))
 
 counts = rbind(sample_replicate_productive_count, sample_productive_count)
--- a/report_clonality/r_wrapper.sh	Wed Jan 27 08:39:16 2016 -0500
+++ b/report_clonality/r_wrapper.sh	Wed Jan 27 10:25:43 2016 -0500
@@ -8,6 +8,7 @@
 locus=$6
 filterproductive=$7
 clonality_method=$8
+filter_uniques=$9
 dir="$(cd "$(dirname "$0")" && pwd)"
 useD="false"
 if grep -q "$species.*${locus}D" "$dir/genes.txt" ; then
@@ -25,7 +26,7 @@
 fi
 mkdir $3
 cp $dir/genes.txt $outputDir
-Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive ${clonality_method} 2>&1
+Rscript --verbose $dir/RScript.r $inputFile $outputDir $outputDir $clonalType "$species" "$locus" $filterproductive ${clonality_method} ${filter_uniques} 2>&1
 cp $dir/tabber.js $outputDir
 cp $dir/style.css $outputDir
 cp $dir/script.js $outputDir
@@ -34,11 +35,12 @@
 
 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $2
 echo "<table border = 1>" >> $2
-echo "<thead><tr><th>Sample/Replicate</th><th>All</th><th>Productive</th><th>Unique Productive</th><th>Unproductive</th><th>Unique Unproductive</th></tr></thead>" >> $2
-while IFS=, read sample all productive perc_prod productive_unique perc_prod_un unproductive perc_unprod unproductive_unique perc_unprod_un
+echo "<thead><tr><th>Sample/Replicate</th><th>All</th><th>Removed</th><th>Productive</th><th>Unique Productive</th><th>Unproductive</th><th>Unique Unproductive</th></tr></thead>" >> $2
+while IFS=, read sample all productive perc_prod productive_unique perc_prod_un unproductive perc_unprod unproductive_unique perc_unprod_un removed perc_rem
 	do
 		echo "<tr><td>$sample</td>" >> $2
 		echo "<td>$all</td>" >> $2
+		echo "<td>${removed} (${perc_rem}%)</td>" >> $2
 		echo "<td>$productive (${perc_prod}%)</td>" >> $2
 		echo "<td>$productive_unique (${perc_prod_un}%)</td>" >> $2
 		echo "<td>$unproductive (${perc_unprod}%)</td>" >> $2
--- a/report_clonality_igg.xml	Wed Jan 27 08:39:16 2016 -0500
+++ b/report_clonality_igg.xml	Wed Jan 27 10:25:43 2016 -0500
@@ -2,15 +2,15 @@
 	<description> </description>
 	<command interpreter="bash">
 #if $gene_selection.source == "imgtdb"		
-	report_clonality/r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive $clonality_method
+	report_clonality/r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "${gene_selection.species}" "${gene_selection.locus}" $filterproductive $clonality_method $filter_uniques
 #else
-	report_clonality/r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method
+	report_clonality/r_wrapper.sh $in_file $out_file $out_file.files_path "$clonaltype" "custom" "${gene_selection.vgenes};${gene_selection.dgenes};${gene_selection.jgenes}" $filterproductive $clonality_method $filter_uniques
 #end if
 	</command>
 	<inputs>
 		<param name="in_file" format="tabular" type="data" label="Data to Process" />
 		<param name="clonaltype" type="select" label="Clonal Type Definition (Needed for clonality calculation)">
-			<option value="none">Dont remove duplicates based on clonaltype</option>
+			<option value="none">Don't remove duplicates based on clonaltype</option>
 			<option value="Top.V.Gene,CDR3.Seq">Top.V.Gene, CDR3.Seq</option>
 			<option value="Top.V.Gene,CDR3.Seq.DNA">Top.V.Gene, CDR3.Seq.DNA</option>
 			<option value="Top.V.Gene,Top.J.Gene,CDR3.Seq">Top.V.Gene, Top.J.Gene, CDR3.Seq</option>
@@ -95,6 +95,11 @@
 			<option value="boyd">R Package</option>
 		</param>
 		
+		<param name="filter_uniques" type="select" label="Filter unique sequences" help="Filter out the sequences (based on CDR1, FR2, CDR2, FR3 and CDR3) that only occur once.">
+			<option value="yes">Yes</option>
+			<option value="no">No</option>
+		</param>
+		
 	</inputs>
 	<outputs>
 		<data format="html" name="out_file" />