# HG changeset patch # User davidvanzessen # Date 1453470329 18000 # Node ID d7580295e13b6eb3a6ab6b2c950e7a995bac25e8 # Parent f2010de70741ebe56a2e88ea15d303b2a40b4fee Uploaded diff -r f2010de70741 -r d7580295e13b imgt_loader/imgt_loader.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/imgt_loader/imgt_loader.r Fri Jan 22 08:45:29 2016 -0500 @@ -0,0 +1,80 @@ +args <- commandArgs(trailingOnly = TRUE) + +summ.file = args[1] +aa.file = args[2] +junction.file = args[3] +out.file = args[4] + +summ = read.table(summ.file, sep="\t", header=T, quote="", fill=T) +aa = read.table(aa.file, sep="\t", header=T, quote="", fill=T) +junction = read.table(junction.file, sep="\t", header=T, quote="", fill=T) + +old_summary_columns=c('Sequence.ID','JUNCTION.frame','V.GENE.and.allele','D.GENE.and.allele','J.GENE.and.allele','CDR1.IMGT.length','CDR2.IMGT.length','CDR3.IMGT.length','Orientation') +old_sequence_columns=c('CDR1.IMGT','CDR2.IMGT','CDR3.IMGT') +old_junction_columns=c('JUNCTION') + +added_summary_columns=c('Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence') +added_sequence_columns=c('FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT') +added_junction_columns=c("P3.V.nt.nb",'N.REGION.nt.nb','N1.REGION.nt.nb',"P5.D.nt.nb","P3.D.nt.nb",'N2.REGION.nt.nb',"P5.J.nt.nb","X3.V.REGION.trimmed.nt.nb","X5.D.REGION.trimmed.nt.nb","X3.D.REGION.trimmed.nt.nb","X5.J.REGION.trimmed.nt.nb","N.REGION","N1.REGION","N2.REGION") + +out=summ[,c("Sequence.ID","JUNCTION.frame","V.GENE.and.allele","D.GENE.and.allele","J.GENE.and.allele")] + +out[,"CDR1.Seq"] = aa[,"CDR1.IMGT"] +out[,"CDR1.Length"] = summ[,"CDR1.IMGT.length"] + +out[,"CDR2.Seq"] = aa[,"CDR2.IMGT"] +out[,"CDR2.Length"] = summ[,"CDR2.IMGT.length"] + +out[,"CDR3.Seq"] = aa[,"CDR3.IMGT"] +out[,"CDR3.Length"] = summ[,"CDR3.IMGT.length"] + +out[,"CDR3.Seq.DNA"] = junction[,"JUNCTION"] +out[,"CDR3.Length.DNA"] = nchar(as.character(junction[,"JUNCTION"])) +out[,"Strand"] = summ[,"Orientation"] +out[,"CDR3.Found.How"] = "a" + +out[,added_summary_columns] = summ[,added_summary_columns] + +out[,added_sequence_columns] = aa[,added_sequence_columns] + +out[,added_junction_columns] = junction[,added_junction_columns] + +out[,"Top V Gene"] = gsub("Homsap ", "", gsub("\\*.*", "", summ[,"V.GENE.and.allele"])) +out[,"Top D Gene"] = gsub("Homsap ", "", gsub("\\*.*", "", summ[,"D.GENE.and.allele"])) +out[,"Top J Gene"] = gsub("Homsap ", "", gsub("\\*.*", "", summ[,"J.GENE.and.allele"])) + +out = out[,c('Sequence.ID','JUNCTION.frame','Top V Gene','Top D Gene','Top J Gene','CDR1.Seq','CDR1.Length','CDR2.Seq','CDR2.Length','CDR3.Seq','CDR3.Length','CDR3.Seq.DNA','CDR3.Length.DNA','Strand','CDR3.Found.How','Functionality','V.REGION.identity..','V.REGION.identity.nt','D.REGION.reading.frame','AA.JUNCTION','Functionality.comment','Sequence','FR1.IMGT','FR2.IMGT','FR3.IMGT','CDR3.IMGT','JUNCTION','J.REGION','FR4.IMGT','P3.V.nt.nb','N.REGION.nt.nb','N1.REGION.nt.nb','P5.D.nt.nb','P3.D.nt.nb','N2.REGION.nt.nb','P5.J.nt.nb','X3.V.REGION.trimmed.nt.nb','X5.D.REGION.trimmed.nt.nb','X3.D.REGION.trimmed.nt.nb','X5.J.REGION.trimmed.nt.nb','N.REGION','N1.REGION','N2.REGION')] + +names(out) = c('ID','VDJ Frame','Top V Gene','Top D Gene','Top J Gene','CDR1 Seq','CDR1 Length','CDR2 Seq','CDR2 Length','CDR3 Seq','CDR3 Length','CDR3 Seq DNA','CDR3 Length DNA','Strand','CDR3 Found How','Functionality','V-REGION identity %','V-REGION identity nt','D-REGION reading frame','AA JUNCTION','Functionality comment','Sequence','FR1-IMGT','FR2-IMGT','FR3-IMGT','CDR3-IMGT','JUNCTION','J-REGION','FR4-IMGT','P3V-nt nb','N-REGION-nt nb','N1-REGION-nt nb','P5D-nt nb','P3D-nt nb','N2-REGION-nt nb','P5J-nt nb','3V-REGION trimmed-nt nb','5D-REGION trimmed-nt nb','3D-REGION trimmed-nt nb','5J-REGION trimmed-nt nb','N-REGION','N1-REGION','N2-REGION') + +out[,"VDJ Frame"] = as.character(out[,"VDJ Frame"]) + +fltr = out[,"VDJ Frame"] == "in-frame" +if(any(fltr)){ + out[fltr, "VDJ Frame"] = "In-frame" +} + +fltr = out[,"VDJ Frame"] == "null" +if(any(fltr)){ + out[fltr, "VDJ Frame"] = "Out-of-frame" +} + +fltr = out[,"VDJ Frame"] == "out-of-frame" +if(any(fltr)){ + out[fltr, "VDJ Frame"] = "Out-of-frame" +} + +fltr = out[,"VDJ Frame"] == "" +if(any(fltr)){ + out[fltr, "VDJ Frame"] = "Out-of-frame" +} + +for(col in c('Top V Gene','Top D Gene','Top J Gene')){ + out[,col] = as.character(out[,col]) + fltr = out[,col] == "" + if(any(fltr)){ + out[fltr,col] = "NA" + } +} + +write.table(out, out.file, sep="\t", quote=F, row.names=F, col.names=T) diff -r f2010de70741 -r d7580295e13b imgt_loader/imgt_loader.sh --- a/imgt_loader/imgt_loader.sh Mon Jan 18 07:55:31 2016 -0500 +++ b/imgt_loader/imgt_loader.sh Fri Jan 22 08:45:29 2016 -0500 @@ -64,4 +64,6 @@ find $PWD/$name/files -iname "5_*" -exec cat {} + > $PWD/$name/aa.txt find $PWD/$name/files -iname "6_*" -exec cat {} + > $PWD/$name/junction.txt -python $dir/imgt_loader.py --summ $PWD/$name/summ.txt --aa $PWD/$name/aa.txt --junction $PWD/$name/junction.txt --output $output +#python $dir/imgt_loader.py --summ $PWD/$name/summ.txt --aa $PWD/$name/aa.txt --junction $PWD/$name/junction.txt --output $output + +Rscript --verbose $dir/imgt_loader.r $PWD/$name/summ.txt $PWD/$name/aa.txt $PWD/$name/junction.txt $output 2>&1 diff -r f2010de70741 -r d7580295e13b report_clonality/circos/circos.conf --- a/report_clonality/circos/circos.conf Mon Jan 18 07:55:31 2016 -0500 +++ b/report_clonality/circos/circos.conf Fri Jan 22 08:45:29 2016 -0500 @@ -12,22 +12,22 @@ # anchor links to segment labels, tick marks). -<> -<> -#<> +<> +<> +<> -<> +<> -<> -<> +<> +<> -karyotype = karyotype.txt +karyotype = DATA_DIR/karyotype.txt -dir = . +dir = DATA_DIR file = circos.png 24bit = yes svg = no @@ -52,7 +52,7 @@ show = no -file = data/row.txt +file = DATA_DIR/row.txt r0 = 1r+200p r1 = 1r+220p stroke_color = black @@ -61,7 +61,7 @@ show = no -file = data/col.txt +file = DATA_DIR/col.txt r0 = 1r+230p r1 = 1r+250p stroke_color = black @@ -70,7 +70,7 @@ show = no -file = data/all.txt +file = DATA_DIR/all.txt r0 = 1r+10p r1 = 1r+35p stroke_color = black @@ -83,7 +83,7 @@ type = text -file = data/segmentlabel.txt +file = DATA_DIR/segmentlabel.txt label_font = condensedbold color = black label_size = 30p @@ -112,7 +112,7 @@ ribbon = yes flat = yes -file = data/cells.txt +file = DATA_DIR/cells.txt bezier_radius = 0.0r radius = 0.999r-15p thickness = 1 diff -r f2010de70741 -r d7580295e13b report_clonality/r_wrapper.sh --- a/report_clonality/r_wrapper.sh Mon Jan 18 07:55:31 2016 -0500 +++ b/report_clonality/r_wrapper.sh Fri Jan 22 08:45:29 2016 -0500 @@ -51,15 +51,31 @@ mkdir $outputDir/circos cp $dir/circos/* $outputDir/circos/ -for sample in $samples; do +sed -i "s%DATA_DIR%$outputDir/circos%" $outputDir/circos/circos.conf +for sample in $samples; do #output the samples to a file and create the circos plots with the R script output echo " $sample" >> $outputFile + circos_file="$outputDir/${sample}_VJ_circos.txt" + echo -e -n "labels$(cat ${circos_file})" > ${circos_file} + cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/ + /home/galaxy/Downloads/circos-master/bin/circos -conf $outputDir/circos/circos.conf 2>&1 + mv $outputDir/circos/circos.png $outputDir/circosVJ_${sample}.png + + + if [[ "$useD" == "true" ]] ; then circos_file="$outputDir/${sample}_VD_circos.txt" echo -e -n "labels$(cat ${circos_file})" > ${circos_file} - #echo -n "labels" > $outputDir/tmp.txt - #cat ${circos_file} >> $outputDir/tmp.txt - #mv $outputDir/tmp.txt ${circos_file} - #cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/; /home/galaxy/Downloads/circos-master/bin/circos -param karyotype=$outputDir/circos/karyotype.txt -conf $dir/circos/circos.conf 2>&1 + cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/ + /home/galaxy/Downloads/circos-master/bin/circos -conf $outputDir/circos/circos.conf 2>&1 + mv $outputDir/circos/circos.png $outputDir/circosVD_${sample}.png + + circos_file="$outputDir/${sample}_DJ_circos.txt" + echo -e -n "labels$(cat ${circos_file})" > ${circos_file} + cat "${circos_file}" | $CIRCOSTOOLS/tableviewer/bin/parse-table -configfile $dir/circos/parse-table.conf 2>&1 | $CIRCOSTOOLS/tableviewer/bin/make-conf -dir $outputDir/circos/ + /home/galaxy/Downloads/circos-master/bin/circos -conf $outputDir/circos/circos.conf 2>&1 + mv $outputDir/circos/circos.png $outputDir/circosDJ_${sample}.png + + fi done echo "" >> $outputFile echo "" >> $outputFile @@ -80,6 +96,8 @@ echo "" >> $outputFile echo "" >> $outputFile +#Heatmaps + count=1 echo "
" >> $outputFile for sample in $samples; do @@ -96,6 +114,23 @@ done echo "
" >> $outputFile +#circos + +echo "
" >> $outputFile +for sample in $samples; do + echo "
" >> $outputFile + if [[ "$useD" == "true" ]] ; then + echo "
" >> $outputFile + fi + echo "" >> $outputFile + if [[ "$useD" == "true" ]] ; then + echo "" >> $outputFile + fi + echo "
V-D
V-J
D-J
" >> $outputFile + count=$((count+1)) +done +echo "
" >> $outputFile + #echo "
" >> $outputFile hasReplicateColumn="$(if head -n 1 $inputFile | grep -q 'Replicate'; then echo 'Yes'; else echo 'No'; fi)"