changeset 35:d7580295e13b draft

Uploaded
author davidvanzessen
date Fri, 22 Jan 2016 08:45:29 -0500
parents f2010de70741
children fd55088e48d2
files imgt_loader/imgt_loader.r imgt_loader/imgt_loader.sh report_clonality/circos/circos.conf report_clonality/r_wrapper.sh
diffstat 4 files changed, 136 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/imgt_loader/imgt_loader.r	Fri Jan 22 08:45:29 2016 -0500
@@ -0,0 +1,80 @@
+args <- commandArgs(trailingOnly = TRUE)
+
+summ.file = args[1]
+aa.file = args[2]
+junction.file = args[3]
+out.file = args[4]
+
+summ = read.table(summ.file, sep="\t", header=T, quote="", fill=T)
+aa = read.table(aa.file, sep="\t", header=T, quote="", fill=T)
+junction = read.table(junction.file, sep="\t", header=T, quote="", fill=T)
+
+old_summary_columns=c('Sequence.ID','JUNCTION.frame','V.GENE.and.allele','D.GENE.and.allele','J.GENE.and.allele','CDR1.IMGT.length','CDR2.IMGT.length','CDR3.IMGT.length','Orientation')
+old_sequence_columns=c('CDR1.IMGT','CDR2.IMGT','CDR3.IMGT')
+old_junction_columns=c('JUNCTION')
+
+added_summary_columns=c('Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence')
+added_sequence_columns=c('FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT')
+added_junction_columns=c("P3.V.nt.nb",'N.REGION.nt.nb','N1.REGION.nt.nb',"P5.D.nt.nb","P3.D.nt.nb",'N2.REGION.nt.nb',"P5.J.nt.nb","X3.V.REGION.trimmed.nt.nb","X5.D.REGION.trimmed.nt.nb","X3.D.REGION.trimmed.nt.nb","X5.J.REGION.trimmed.nt.nb","N.REGION","N1.REGION","N2.REGION")
+
+out=summ[,c("Sequence.ID","JUNCTION.frame","V.GENE.and.allele","D.GENE.and.allele","J.GENE.and.allele")]
+
+out[,"CDR1.Seq"] = aa[,"CDR1.IMGT"]
+out[,"CDR1.Length"] = summ[,"CDR1.IMGT.length"]
+
+out[,"CDR2.Seq"] = aa[,"CDR2.IMGT"]
+out[,"CDR2.Length"] = summ[,"CDR2.IMGT.length"]
+
+out[,"CDR3.Seq"] = aa[,"CDR3.IMGT"]
+out[,"CDR3.Length"] = summ[,"CDR3.IMGT.length"]
+
+out[,"CDR3.Seq.DNA"] = junction[,"JUNCTION"]
+out[,"CDR3.Length.DNA"] = nchar(as.character(junction[,"JUNCTION"]))
+out[,"Strand"] = summ[,"Orientation"]
+out[,"CDR3.Found.How"] = "a"
+
+out[,added_summary_columns] = summ[,added_summary_columns]
+
+out[,added_sequence_columns] = aa[,added_sequence_columns]
+
+out[,added_junction_columns] = junction[,added_junction_columns]
+
+out[,"Top V Gene"] = gsub("Homsap ", "", gsub("\\*.*", "", summ[,"V.GENE.and.allele"]))
+out[,"Top D Gene"] = gsub("Homsap ", "", gsub("\\*.*", "", summ[,"D.GENE.and.allele"]))
+out[,"Top J Gene"] = gsub("Homsap ", "", gsub("\\*.*", "", summ[,"J.GENE.and.allele"]))
+
+out = out[,c('Sequence.ID','JUNCTION.frame','Top V Gene','Top D Gene','Top J Gene','CDR1.Seq','CDR1.Length','CDR2.Seq','CDR2.Length','CDR3.Seq','CDR3.Length','CDR3.Seq.DNA','CDR3.Length.DNA','Strand','CDR3.Found.How','Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence','FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT','P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION')]
+
+names(out) = c('ID','VDJ Frame','Top V Gene','Top D Gene','Top J Gene','CDR1 Seq','CDR1 Length','CDR2 Seq','CDR2 Length','CDR3 Seq','CDR3 Length','CDR3 Seq DNA','CDR3 Length DNA','Strand','CDR3 Found How','Functionality','V-REGION identity %','V-REGION identity nt','D-REGION reading frame','AA JUNCTION','Functionality comment','Sequence','FR1-IMGT','FR2-IMGT','FR3-IMGT','CDR3-IMGT','JUNCTION','J-REGION','FR4-IMGT','P3V-nt nb','N-REGION-nt nb','N1-REGION-nt nb','P5D-nt nb','P3D-nt nb','N2-REGION-nt nb','P5J-nt nb','3V-REGION trimmed-nt nb','5D-REGION trimmed-nt nb','3D-REGION trimmed-nt nb','5J-REGION trimmed-nt nb','N-REGION','N1-REGION','N2-REGION')
+
+out[,"VDJ Frame"] = as.character(out[,"VDJ Frame"])
+
+fltr = out[,"VDJ Frame"] == "in-frame"
+if(any(fltr)){
+	out[fltr, "VDJ Frame"] = "In-frame"
+}
+
+fltr = out[,"VDJ Frame"] == "null"
+if(any(fltr)){
+	out[fltr, "VDJ Frame"] = "Out-of-frame"
+}
+
+fltr = out[,"VDJ Frame"] == "out-of-frame"
+if(any(fltr)){
+	out[fltr, "VDJ Frame"] = "Out-of-frame"
+}
+
+fltr = out[,"VDJ Frame"] == ""
+if(any(fltr)){
+	out[fltr, "VDJ Frame"] = "Out-of-frame"
+}
+
+for(col in c('Top V Gene','Top D Gene','Top J Gene')){
+	out[,col] = as.character(out[,col])
+	fltr = out[,col] == ""
+	if(any(fltr)){
+		out[fltr,col] = "NA"
+	}
+}
+
+write.table(out, out.file, sep="\t", quote=F, row.names=F, col.names=T)
--- a/imgt_loader/imgt_loader.sh	Mon Jan 18 07:55:31 2016 -0500
+++ b/imgt_loader/imgt_loader.sh	Fri Jan 22 08:45:29 2016 -0500
@@ -64,4 +64,6 @@
 find $PWD/$name/files -iname "5_*" -exec cat {} + > $PWD/$name/aa.txt
 find $PWD/$name/files -iname "6_*" -exec cat {} + > $PWD/$name/junction.txt
 
-python $dir/imgt_loader.py --summ $PWD/$name/summ.txt --aa $PWD/$name/aa.txt --junction $PWD/$name/junction.txt --output $output
+#python $dir/imgt_loader.py --summ $PWD/$name/summ.txt --aa $PWD/$name/aa.txt --junction $PWD/$name/junction.txt --output $output
+
+Rscript --verbose $dir/imgt_loader.r $PWD/$name/summ.txt $PWD/$name/aa.txt $PWD/$name/junction.txt $output 2>&1
--- a/report_clonality/circos/circos.conf	Mon Jan 18 07:55:31 2016 -0500
+++ b/report_clonality/circos/circos.conf	Fri Jan 22 08:45:29 2016 -0500
@@ -12,22 +12,22 @@
 # anchor links to segment labels, tick marks).
 
 <colors>
-<<include etc_colors.conf>>
-<<include colors.conf>>
-#<<include colors_percentile.conf>>
+<<include DATA_DIR/etc_colors.conf>>
+<<include DATA_DIR/colors.conf>>
+<<include DATA_DIR/colors_percentile.conf>>
 </colors>
 
 <fonts>
-<<include fonts.conf>>
+<<include DATA_DIR/fonts.conf>>
 </fonts>
 
-<<include ideogram.conf>>
-<<include ticks.conf>>
+<<include DATA_DIR/ideogram.conf>>
+<<include DATA_DIR/ticks.conf>>
 
-karyotype = karyotype.txt
+karyotype = DATA_DIR/karyotype.txt
 
 <image>
-dir   = .
+dir   = DATA_DIR
 file  = circos.png
 24bit = yes
 svg   = no
@@ -52,7 +52,7 @@
 
 <highlight>
 show = no
-file = data/row.txt
+file = DATA_DIR/row.txt
 r0 = 1r+200p
 r1 = 1r+220p
 stroke_color = black
@@ -61,7 +61,7 @@
 
 <highlight>
 show = no
-file = data/col.txt
+file = DATA_DIR/col.txt
 r0 = 1r+230p
 r1 = 1r+250p
 stroke_color = black
@@ -70,7 +70,7 @@
 
 <highlight>
 show = no
-file = data/all.txt
+file = DATA_DIR/all.txt
 r0 = 1r+10p
 r1 = 1r+35p
 stroke_color = black
@@ -83,7 +83,7 @@
 
 <plot>
 type = text
-file = data/segmentlabel.txt
+file = DATA_DIR/segmentlabel.txt
 label_font = condensedbold
 color = black
 label_size = 30p
@@ -112,7 +112,7 @@
 <link cellvalues>
 ribbon        = yes
 flat          = yes
-file          = data/cells.txt
+file          = DATA_DIR/cells.txt
 bezier_radius = 0.0r
 radius        = 0.999r-15p
 thickness     = 1
--- a/report_clonality/r_wrapper.sh	Mon Jan 18 07:55:31 2016 -0500
+++ b/report_clonality/r_wrapper.sh	Fri Jan 22 08:45:29 2016 -0500
@@ -51,15 +51,31 @@
 
 mkdir $outputDir/circos
 cp $dir/circos/* $outputDir/circos/
-for sample in $samples; do
+sed -i "s%DATA_DIR%$outputDir/circos%" $outputDir/circos/circos.conf
+for sample in $samples; do #output the samples to a file and create the circos plots with the R script output
 	echo " $sample" >> $outputFile
 	
+	circos_file="$outputDir/${sample}_VJ_circos.txt"
+	echo -e -n "labels$(cat ${circos_file})" > ${circos_file}
+	cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/
+	/home/galaxy/Downloads/circos-master/bin/circos -conf $outputDir/circos/circos.conf 2>&1
+	mv $outputDir/circos/circos.png $outputDir/circosVJ_${sample}.png
+	
+	
+	if [[ "$useD" == "true" ]] ; then
 		circos_file="$outputDir/${sample}_VD_circos.txt"
 		echo -e -n "labels$(cat ${circos_file})" > ${circos_file}
-		#echo -n "labels" > $outputDir/tmp.txt
-		#cat ${circos_file} >> $outputDir/tmp.txt
-		#mv $outputDir/tmp.txt ${circos_file}
-		#cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/; /home/galaxy/Downloads/circos-master/bin/circos -param karyotype=$outputDir/circos/karyotype.txt -conf $dir/circos/circos.conf 2>&1
+		cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/
+		/home/galaxy/Downloads/circos-master/bin/circos -conf $outputDir/circos/circos.conf 2>&1
+		mv $outputDir/circos/circos.png $outputDir/circosVD_${sample}.png
+		
+		circos_file="$outputDir/${sample}_DJ_circos.txt"
+		echo -e -n "labels$(cat ${circos_file})" > ${circos_file}
+		cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/
+		/home/galaxy/Downloads/circos-master/bin/circos -conf $outputDir/circos/circos.conf 2>&1
+		mv $outputDir/circos/circos.png $outputDir/circosDJ_${sample}.png
+		
+	fi
 done
 echo "</title><script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $outputFile
 echo "<script type='text/javascript' src='tabber.js'></script>" >> $outputFile
@@ -80,6 +96,8 @@
 echo "<img src='JPlot.png'/>" >> $outputFile
 echo "<img src='AAComposition.png'/></div>" >> $outputFile
 
+#Heatmaps
+
 count=1
 echo "<div class='tabbertab' title='Heatmaps'><div class='tabber'>" >> $outputFile
 for sample in $samples; do
@@ -96,6 +114,23 @@
 done
 echo "</div></div>" >> $outputFile
 
+#circos
+
+echo "<div class='tabbertab' title='Circos'><div class='tabber'>" >> $outputFile
+for sample in $samples; do
+	echo "<div class='tabbertab' title='$sample'><table border='1'><center>" >> $outputFile
+	if [[ "$useD" == "true" ]] ; then
+		echo "<tr><td>V-D</td><td><img src='circosVD_${sample}.png' width='700' height='700'/></td></tr>" >> $outputFile
+	fi
+	echo "<tr><td>V-J</td><td><img src='circosVJ_${sample}.png' width='700' height='700'/></td></tr>" >> $outputFile
+	if [[ "$useD" == "true" ]] ; then
+		echo "<tr><td>D-J</td><td><img src='circosDJ_${sample}.png' width='700' height='700'/></td></tr>" >> $outputFile
+	fi
+	echo "<center></table></div>" >> $outputFile
+	count=$((count+1))
+done
+echo "</div></div>" >> $outputFile
+
 #echo "<div class='tabbertab' title='Interactive'><svg class='chart'></svg><script src='http://d3js.org/d3.v3.min.js'></script></div>" >> $outputFile
 
 hasReplicateColumn="$(if head -n 1 $inputFile | grep -q 'Replicate'; then echo 'Yes'; else echo 'No'; fi)"