Mercurial > repos > czouaoui > circoletto
changeset 8:b1dd300754cb draft default tip
Uploaded
author | czouaoui |
---|---|
date | Tue, 13 Mar 2018 11:31:50 -0400 |
parents | 99e2897b4ad0 |
children | |
files | circoletto.pl |
diffstat | 1 files changed, 1176 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/circoletto.pl Tue Mar 13 11:31:50 2018 -0400 @@ -0,0 +1,1176 @@ +#!/usr/bin/perl -w + +$version = localtime((stat($0))[9]); +$circos_compatibility = '0.69-3'; + +$| = 1; + +use POSIX; +use Getopt::Long; +use Bio::SearchIO; +use Bio::Seq; + +$randomer = int(rand(1001)) . $$; +$randomer = sprintf "%010s", $randomer; + +$usage = "# Circoletto - visualising sequence similarity with Circos +# Darzentas N. +# $version +# http://bat.infspire.org | bat\@infspire.org + +welcome + +before you run Circoletto, be sure to: +- have Circos (tested with $circos_compatibility, http://circos.ca/software/download/circos/), BLAST (tested with 2.2.25) in your path, and BioPerl (tested with 1.6.901) installed +- check / edit (in the code) the two paths to Circos and Circos tools - if we cannot find them, we'll print a warning and exit +- if you need to increase the max_sequences > 200, you also need to edit max_ideograms in Circos' housekeeping.conf + +circoletto.pl + + either +--query or --q (path to) the queries +--database or --db (path to) the database + or +--blastout or --bl (path to) the BLAST output + + other (optional) arguments +--out_dir output directory, otherwise pwd +--out_name output basename (extension will be added automatically) +--best_hit set to show only best hit per query +--best_hit_type best hit type, 'entry' to show all HSPs per best hit [default], or 'local' to show single best HSP +--w_hits set if you want to show only entries with BLAST hits +--z_by depth-order ribbons by 'score' (highest at top) [default] / 'score_rev' / 'alnlen' (longest at top) / 'alnlen_rev' +--e_value E-value [default: 1e-10] +--gep gap extension penalty, set to >=2 if you need to constrain it e.g. for genomic data [default: -1] +--flt set to enable pre-filtering of query sequences +--html_out set to provide BLAST HTML output (runs BLAST again...) +--no_labels set to switch off labels +--out_size set radius of output in pixels, so set to '1000' for a 2000x2000 output [default: 1000] +--out_type output type, either 'svg', or 'png' [default] +--score2colour score to colour ribbons with, 'bit' for bitscore [default], or 'eval' for E-value, or 'id' for % identity +--scoreratio2colour score ratio to use for colouring, 'max' for score/max [default], 'minmax' for (score-min)/(max-min) that should give more colour range esp. for % identity +--abscolour use absolute scores for colouring, currently only allowed with % identity +--maxB1 max score for blue +--maxG2 max score for green +--maxO3 max score for orange, then it's red +--annotation user provided annotation file, see 'example_annotation.txt' +--annocolour colour ribbons by 'query' or 'database' default ideogram colours or annotation (see --annotation), or by 'query_rainbow_(colour|grey)' or 'database_rainbow_(colour|grey)' +--invertcolour set to colour ribbons by SEQUENCE (i.e. not ORDER) invertion (or reverse complementarity or plus/minus), normal in black, inverted in lime +--untangling_off set to turn off ribbon untangling +--revcomp_q set to reverse complement query DNA sequences +--revcomp_d set to reverse complement database DNA sequences +--reverse_qorder set to reverse ORDER of query sequences, may help clarity +--reverse_dorder set to reverse ORDER of database sequences, may help clarity +--reverse_qorient set to reverse ORIENTATION of query sequences which then need to be read anticlockwisely, may help clarity +--reverse_dorient set to reverse ORIENTATION of database sequences which then need to be read anticlockwisely, may help clarity +--hide_orient_lights set to hide orientation lights at edges of ideograms, read from green (=beginning) to red (=end) +--ribocolours2allow blue, green, orange, red in a format like this (including parentheses) '(green|orange)' or '(blue)' - histograms are not affected +--tblastx run 6-frame tBLASTx for DNA vs DNA +--cpus number of CPUs to use with BLAST + +"; + +my $options_res = GetOptions( + "query|q:s" => \$query, + "database|db:s" => \$database, + "blastout|bl:s" => \$blastout, + "out_dir:s" => \$out_dir, + "out_name:s" => \$out_name, + "out_size:f" => \$out_size, + "out_type:s" => \$out_type, + "e_value|e-value|evalue:s" => \$e_value, + "gep:s" => \$gep, + "flt!" => \$flt, + "best_hit!" => \$best_hit, + "best_hit_type:s" => \$best_hit_type, + "z_by:s" => \$z_by, + "w_hits!" => \$w_hits, + "untangling_off|tangle!" => \$untangling_off, + "revcomp_q!" => \$revcomp_q, + "revcomp_d!" => \$revcomp_d, + "reverse_qorder!" => \$reverse_qorder, + "reverse_dorder!" => \$reverse_dorder, + "reverse_qorient!" => \$reverse_qorient, + "reverse_dorient!" => \$reverse_dorient, + "hide_orient_lights!" => \$hide_orient_lights, + "ribocolours2allow:s" => \$ribocolours2allow, + "annotation:s" => \$annotation, + "score2colour:s" => \$score2colour, + "scoreratio2colour:s" => \$scoreratio2colour, + "abscolour!" => \$abscolour, + "maxB1:s" => \$maxB1, + "maxG2:s" => \$maxG2, + "maxO3:s" => \$maxO3, + #"maxR4:s" => \$maxR4, + "annocolour|annotate_by:s" => \$annocolour, + "invertcolour!" => \$invertcolour, + "no_labels!" => \$no_labels, + "dirty_labels!" => \$dirty_labels, + "tblastx!" => \$tblastx, + "override!" => \$override, + "ltrphyler!" => \$ltrphyler, + "cpus:f" => \$cpus, + "mhits:f" => \$mhits, + "online!" => \$online + ); + + +############################################################# +#$path2circos = '/home/czouaoui/circos/circos-0.69-3'; # path to Circos +#$path2circostools = '/home/czouaoui/circos/circos-tools-0.22'; # path to Circos utilities +############################################################# +#if (-e "$path2circos/bin/circos") { $path2circos_ok = 1; } else { print "(!) path to Circos not OK\n"; } +#if (-e "$path2circostools/tools/orderchr/bin/orderchr") { $path2circostools_ok = 1; } else { print "(!) path to Circos tools not OK\n"; } +#unless ($path2circos_ok && $path2circostools_ok) { print "(!) please edit paths in circoletto.pl and try again - exiting\n"; exit; } + +#if (`which blast2` !~ /blast2/) { print "(!) cannot find blast2 - exiting\n"; exit; } +#if (`which formatdb` !~ /formatdb/) { print "(!) cannot find formatdb - exiting\n"; exit; } +# +#if (`perl -MBio::SearchIO -e 0` ne '') { print "(!) cannot find Bio::SearchIO - exiting\n"; exit; } +#if (`perl -MBio::Seq -e 0` ne '') { print "(!) cannot find Bio::Seq - exiting\n"; exit; } + + +unless ($online) { # ... i.e. offline + + unless ((defined($query) && defined($database)) || defined($blastout)) { die $usage; } + $out_dir = defined $out_dir ? "$out_dir/" : '.'; + $dir = "dir = $out_dir"; + if ($override) { $factor = 50; + } else { $factor = 1; } + $max_sequences = 200 * $factor; + $max_ribbons = 1000 * $factor; + $max_ribbons2untangle = 100; + $max_total_length = 2000000000; $max_total_length_in_mb = 2000; + $max_his_data = 500000; + +} else { + + $out_dir = '/labs/bat/www/tools/results/circoletto/'; + $dir = 'dir = /labs/bat/www/tools/results/circoletto'; + $gep = '-1'; + if ($override) { $factor = 50; + } else { $factor = 1; } + $max_sequences = 200 * $factor; + $max_ribbons = 1000 * $factor; + $max_ribbons2untangle = 100; + $max_total_length = 2000000000; $max_total_length_in_mb = 2000; + $max_his_data = 500000; + +} + +unless (defined $max_label_len) { $max_label_len = 20; } # 20 +unless (defined $out_type) { $out_type = 'png'; } +unless (defined $e_value) { $e_value = '1e-10'; } +unless (defined $gep) { $gep = '-1'; } +unless (defined $flt) { $flt = 0; } +unless (defined $score2colour) { $score2colour = 'bit'; } +unless (defined $scoreratio2colour) { $scoreratio2colour = 'max'; } +unless (defined $abscolour) { $abscolour = 0; } +unless (defined $maxB1) { $maxB1 = 50; } +unless (defined $maxG2) { $maxG2 = 75; } +unless (defined $maxO3) { $maxO3 = 99.9999; } +unless (defined $best_hit) { $best_hit = 0; } +unless (defined $best_hit_type) { $best_hit_type = 'entry'; } +unless (defined $w_hits) { $w_hits = 0; } +unless (defined $annocolour) { $annocolour = 'scores'; } +unless (defined $invertcolour) { $invertcolour = 0; } +unless (defined $no_labels) { $no_labels = 0; } +unless (defined $dirty_labels) { $dirty_labels = 0; } +unless (defined $untangling_off) { $untangling_off = 0; } +unless (defined $revcomp_q) { $revcomp_q = 0; } +unless (defined $revcomp_d) { $revcomp_d = 0; } +unless (defined $reverse_qorder) { $reverse_qorder = 0; } +unless (defined $reverse_dorder) { $reverse_dorder = 0; } +unless (defined $reverse_qorient) { $reverse_qorient = 0; } +unless (defined $reverse_dorient) { $reverse_dorient = 0; } +unless (defined $hide_orient_lights) { $hide_orient_lights = 0; } +unless (defined $out_size) { $out_size = '1000p'; } else { $out_size .= 'p'; } +unless (defined $tblastx) { $tblastx = 0; } +unless (defined $override) { $override = 0; } +unless (defined $ltrphyler) { $ltrphyler = 0; } +unless (defined $cpus) { $cpus = 1; } +unless (defined $ribocolours2allow) { $ribocolours2allow = "(blue|green|orange|red)"; } +unless (defined $z_by) { $z_by = "score"; } +if ($ltrphyler && $tblastx) { $lc_flt = '-U T'; } else { $lc_flt = ''; } + +open STDERR, '>/dev/null'; + +#green fully opaque +#green_a1 16% transparent +#green_a2 33% transparent +#green_a3 50% transparent +#green_a4 66% transparent +#green_a5 83% transparent +%kolours = ( +'query' , 'vvlgrey', +'database', 'dgrey', +'defdom' , 'black' +); +%ribocolours = ( +'q1' , 'blue', +'q2' , 'green', +'q3' , 'orange', +'q4' , 'red' +); +%score2colour4report = ( +'bit' , 'bitscore', +'eval' , 'E-value', +'id' , '% identity' +); +%scoreratio2colour4report = ( +'max' , 'score/max', +'minmax' , '(score-min)/(max-min)' +); +$greenlight = 'lgreen'; +$redlight = 'lred'; + +$chr_order = 'chromosomes_order = '; +if ($reverse_qorient || $reverse_dorient) { $chr2rev = 'chromosomes_reverse = '; } else { $chr2rev = ''; } + +if (defined($query) && defined($database)) { + + print "(?) going through the FASTA files\n"; + + #`perl -pi -e 's/\r//g' $query` unless $online; + $flag = 0; + open (FASTA, "$query"); + while (<FASTA>) { + chomp; + if (/^>/) { + $label = (split / /, $_)[0]; +# update annotation FASTA loading when you change these + $label =~ s/^>//; + $label =~ s/[^\w\-.]/_/g unless $dirty_labels; + $label =~ s/_{1,}/./g unless $dirty_labels; + $uniq_label = substr($label,0,$max_label_len); + while (defined($uniq_labels{$uniq_label})) { + $randomer4label = int(rand(1001)); + $randomer4label = sprintf "%04s", $randomer4label; + $uniq_label = substr($label,0,$max_label_len-4) . "$randomer4label"; + } + $uniq_labels{$label} = $uniq_label; + $uniq_labels{$uniq_label} = $uniq_label; + $original_labels{$uniq_label} = $_; + $original_labels{$uniq_label} =~ s/^>//; + $label = $uniq_label; + ++$query_entries{$label}; + push(@{$ordered{query}}, $label); + $flag = 1; + } elsif (/\w/) { + s/\s//g; + $sequences{$label} .= $_; + $flag = 0; + } + } + close FASTA; + + foreach $label (keys %query_entries) { + if (length($sequences{$label}) > 20) { + $seq = Bio::Seq->new(); + if(! $seq->validate_seq($sequences{$label}) ) { + print "(!) query $label has invalid sequence - exiting\n"; exit; + } else { + $seq->seq($sequences{$label}); + ++$q_seq_types{$seq->alphabet}; + } + } + } + if (keys(%q_seq_types) == 1) { + foreach $type (keys %q_seq_types) { + $q_seq_type = $type; + } + } else { + print "(!) your query file apparently has no sequence label(s), or both nucleotide and amino acid sequences,\nor confusing/invalid characters, or too little sequence - exiting\n"; exit; + } + + if ($flag) { print "(!) your query file is incomplete (most probably) - exiting\n"; exit; + } else { + if (keys(%query_entries) == 0) { print "(!) your query file is either empty or not in right format - exiting\n"; exit; + } elsif (keys(%query_entries) > $max_sequences) { print "(!) your query file contains too many sequences (" . keys(%query_entries) . " - we allow up to $max_sequences) - exiting\n"; exit; + } else { print "(=) " . keys(%query_entries) . " entries found in query file\n"; } + } + + if ($q_seq_type eq 'dna' && $revcomp_q) { print "(?) reverse complementing query DNA sequences\n"; } + $clean = ''; + $#non_uniq_labels = -1; + foreach $label (sort keys %query_entries) { + if ($q_seq_type eq 'dna' && $revcomp_q) { $clean_seq = revcomp($sequences{$label}); + } else { $clean_seq = $sequences{$label}; } + $clean .= ">$label\n$clean_seq\n"; + + if ($query_entries{$label} > 1) { + push(@non_uniq_labels, $label); + } + } + if (scalar(@non_uniq_labels) > 0) { + print "(!) the max label length (currently $max_label_len) causes some query sequence labels\n(" . join(", ",@non_uniq_labels) . ") to be redundant... please change the first $max_label_len characters accordingly - exiting\n"; exit; + } + + if ($reverse_qorder) { + print "(?) reversing order of query ideograms\n"; + @{$ordered{query}} = reverse(@{$ordered{query}}); + } + + open (CLEAN, ">$query.clean"); + print CLEAN $clean; + close CLEAN; + + if ($query ne $database) { + + $flag = 0; + #`perl -pi -e 's/\r//g' $database` unless $online; + open (FASTA, "$database"); + while (<FASTA>) { + chomp; + if (/^>/) { + $label = (split / /, $_)[0]; + $label =~ s/^>//; + $label =~ s/[^\w\-.]/_/g unless $dirty_labels; + $label =~ s/_{1,}/./g unless $dirty_labels; + $uniq_label = substr($label,0,$max_label_len); + while (defined($uniq_labels{$uniq_label})) { + $randomer4label = int(rand(1001)); + $randomer4label = sprintf "%04s", $randomer4label; + $uniq_label = substr($label,0,$max_label_len-4) . "$randomer4label"; + } + $uniq_labels{$label} = $uniq_label; + $uniq_labels{$uniq_label} = $uniq_label; + $original_labels{$uniq_label} = $_; + $original_labels{$uniq_label} =~ s/^>//; + $label = $uniq_label; + ++$database_entries{$label}; + push(@{$ordered{database}}, $label); + $flag = 1; + } elsif (/\w/) { + s/\s//g; + unless (defined($query_entries{$label})) { + $sequences{$label} .= $_; + } + $flag = 0; + } + } + close FASTA; + + foreach $label (keys %database_entries) { + if (length($sequences{$label}) > 20) { + $seq = Bio::Seq->new(); + if(! $seq->validate_seq($sequences{$label}) ) { + print "(!) database entry $label has invalid sequence - exiting\n"; exit; + } else { + $seq->seq($sequences{$label}); + ++$d_seq_types{$seq->alphabet}; + #print $seq->alphabet . "\t$original_labels{$label}\n"; + } + } + } + if (keys(%d_seq_types) == 1) { + foreach $type (keys %d_seq_types) { + $d_seq_type = $type; + } + } else { + print "(!) your database file apparently has no sequence label(s), or both nucleotide and amino acid sequences,\nor confusing/invalid characters, or too little sequence - exiting\n"; exit; + } + + if ($flag) { print "(!) your database file is incomplete (most probably)\n"; + } else { + if (keys(%database_entries) == 0) { print "(!) your database file is either empty or not in right format - exiting\n"; exit; + } elsif (keys(%database_entries) > $max_sequences) { print "(!) your database file contains too many sequences (" . keys(%database_entries) . " - we allow up to $max_sequences) - exiting\n"; exit; + } else { print "(=) " . keys(%database_entries) . " entries found in database file\n"; } + } + + if ($d_seq_type eq 'dna' && $revcomp_d) { print "(?) reverse complementing database DNA sequences\n"; } + $clean = ''; + $#non_uniq_labels = -1; + foreach $label (sort keys %database_entries) { + if ($d_seq_type eq 'dna' && $revcomp_d) { $clean_seq = revcomp($sequences{$label}); + } else { $clean_seq = $sequences{$label}; } + $clean .= ">$label\n$clean_seq\n"; + + if ($database_entries{$label} > 1) { + push(@non_uniq_labels, $label); + } + } + if (scalar(@non_uniq_labels) > 0) { print "(!) the max label length (currently $max_label_len) causes some database sequence labels\n(" . join(", ",@non_uniq_labels) . ") to be redundant... please change the first $max_label_len characters accordingly - exiting\n"; exit; } + + if ($reverse_dorder) { + print "(?) reversing order of database ideograms\n"; + @{$ordered{database}} = reverse(@{$ordered{database}}); + } + + open (CLEAN, ">$database.clean"); + print CLEAN $clean; + close CLEAN; + + } else { + print "(?) query and database seem the same (filename and/or content), we assume you're running an all against all\n"; + $d_seq_type = $q_seq_type; + } + + if (keys(%sequences) > $max_sequences) { + print "(!) you have too many sequences in total (" . keys(%sequences) . " - we allow up to $max_sequences) - exiting\n"; exit; + } + $max_hits_per_query = sprintf "%.0f", $max_ribbons / keys(%sequences); + $lensum = 0; + foreach $label (keys %sequences){ + $lengths{$label} = length($sequences{$label}); + $lensum += length($sequences{$label}); + } + %sequences = (); + + + if ($q_seq_type eq 'dna' && $d_seq_type eq 'dna') { + $seq_type4formatdb = 'F'; + $database4formatdb2check = "$database.clean.nsq"; + if ($tblastx) { + if ($lensum > $max_total_length/200 && !$tblastx) { + print "(!) running tBLASTx with your sequences may overload our server,\neither load less data (<".($max_total_length_in_mb/200)."Mb) or tBLASTx-it-yourself and come back - exiting\n"; exit; + } elsif ($lensum > $max_total_length/500 && !$tblastx) { + print "(!) running tBLASTx with only one CPU due to your largish dataset - may take a while\n"; + $cpus = 1; + } + $program = 'tblastx'; + } else { + $program = 'blastn'; + } + } elsif ($q_seq_type eq 'protein' && $d_seq_type eq 'protein') { + $seq_type4formatdb = 'T'; + $database4formatdb2check = "$database.clean.psq"; + $program = 'blastp'; + } elsif ($q_seq_type eq 'dna' && $d_seq_type eq 'protein') { + $seq_type4formatdb = 'T'; + $database4formatdb2check = "$database.clean.psq"; + $program = 'blastx'; + } elsif ($q_seq_type eq 'protein' && $d_seq_type eq 'dna') { + $seq_type4formatdb = 'F'; + $database4formatdb2check = "$database.clean.nsq"; + $program = 'tblastn'; + } + print "(?) formatdb is running\n"; + my $formatdb = `nice formatdb -i $database.clean -p $seq_type4formatdb -l $database.clean.formatdb.log`; + $status = $? >> 8; unless ($status==0) { print "(!) there was an error running formatdb on your database - exiting\n"; exit; } + if ($flt) {$flt = 'T'; + } else { $flt = 'F'; } + $blastout = "$out_dir" . "cl$randomer.blasted"; + if ($best_hit) { + $v4blast = 1; + $b4blast = 1; + } else { + if (defined($mhits)) { + $v4blast = $mhits; + $b4blast = $mhits; + } else { + $v4blast = $max_sequences; + $b4blast = $max_sequences; + } + } + print "(?) $program is running with -F $flt -e $e_value -E $gep -v $v4blast -b $b4blast $lc_flt\n"; + my $blast = `nice blastall -p $program -i $query.clean -d $database.clean -F $flt -e $e_value -E $gep -v $v4blast -b $b4blast -a $cpus $lc_flt > $blastout`; + $status = $? >> 8; unless ($status==0) { print "(!) there was an error running $program with your files - exiting\n"; exit; } + +} + +print "(?) going through the BLAST output\n"; +#`perl -pi -e 's/\r//g' $blastout` unless $online; +$grep4queries = `grep -c '^Query=' $blastout`; +chomp $grep4queries; +unless ($grep4queries =~ /\d/ && $grep4queries > 0) { print "(!) could not find any queries, or BLAST output is in wrong format - exiting\n"; exit; } +$max_hits_per_query = sprintf "%.0f", $max_ribbons / $grep4queries; +%hsp = (); +my $in = new Bio::SearchIO(-format => 'blast', + -file => "$blastout"); +while( my $result = $in->next_result ) { + $blastout_query = $result->query_name; + $uniq_label = substr($blastout_query,0,$max_label_len); + if (defined($uniq_labels{$uniq_label})) { + $blastout_query = $uniq_labels{$uniq_label}; + } else { + $blastout_query =~ s/[^\w\-.]/_/g unless $dirty_labels; + $blastout_query =~ s/_{1,}/./g unless $dirty_labels; + $uniq_label = substr($blastout_query,0,$max_label_len); + while (defined($uniq_labels{$uniq_label})) { + $randomer4label = int(rand(1001)); + $randomer4label = sprintf "%04s", $randomer4label; + $uniq_label = substr($blastout_query,0,$max_label_len-4) . "$randomer4label"; + } + $uniq_labels{$blastout_query} = $uniq_label; + $uniq_labels{$uniq_label} = $uniq_label; + $original_labels{$blastout_query} = $result->query_name; + $blastout_query = $uniq_label; + } + if ($blastout_query =~ /\w/) { + $lengths{$blastout_query} = $result->query_length; + unless (defined($query_entries{$blastout_query})) { + push(@{$ordered{query}}, $blastout_query); + } + $query_entries{$blastout_query} = 1; + while( my $hit = $result->next_hit ) { + $blastout_hit = $hit->name; + $uniq_label = substr($blastout_hit,0,$max_label_len); + if (defined($uniq_labels{$uniq_label})) { + $blastout_hit = $uniq_labels{$uniq_label}; + } else { + $blastout_hit =~ s/[^\w\-.]/_/g unless $dirty_labels; + $blastout_hit =~ s/_{1,}/./g unless $dirty_labels; + $uniq_label = substr($blastout_hit,0,$max_label_len); + while (defined($uniq_labels{$uniq_label})) { + $randomer4label = int(rand(1001)); + $randomer4label = sprintf "%04s", $randomer4label; + $uniq_label = substr($blastout_hit,0,$max_label_len-4) . "$randomer4label"; + } + $uniq_labels{$blastout_hit} = $uniq_label; + $uniq_labels{$uniq_label} = $uniq_label; + $original_labels{$blastout_hit} = $hit->name; + $blastout_hit = $uniq_label; + } + if ($blastout_hit =~ /\w/) { + $lengths{$blastout_hit} = $hit->length; + unless (defined($database_entries{$blastout_hit})) { + push(@{$ordered{database}}, $blastout_hit); + } + $database_entries{$blastout_hit} = 1; + while( my $hithsp = $hit->next_hsp ) { + if ($blastout_query eq $blastout_hit && $hithsp->hsp_length/$lengths{$blastout_query} <= 0.5 ) { $go = 1; + } elsif ($blastout_query ne $blastout_hit) { $go = 1; + } else { $go = 0; } + if ($go) { + if ($score2colour eq 'eval') { + if ($hithsp->evalue == 0) { + $score = 181; + } else { + $conv2e = sprintf "%.0e", $hithsp->evalue; + @split = split("e-", $conv2e); + $score = "$split[1].$split[0]"; + } + } elsif ($score2colour eq 'bit') { $score = $hithsp->bits; + } elsif ($score2colour eq 'id') { $score = $hithsp->percent_identity; + } else { $score = $hithsp->bits; } + $qstart = $hithsp->start('query'); + $qend = $hithsp->end('query'); + #$qstrand = $hithsp->strand('query'); + $hstrand = $hithsp->strand('hit'); + if ($hstrand == 0 || $hstrand == 1) { + $hstart = $hithsp->start('hit'); + $hend = $hithsp->end('hit'); + } elsif ($hstrand == -1) { + $hend = $hithsp->start('hit'); + $hstart = $hithsp->end('hit'); + } + $mem{all}{$blastout_query}{$score}{$blastout_hit}{"$qstart $qend"}{"$hstart $hend"} = $hithsp->hsp_length; + $entries_per_set{all}{$blastout_query} = 1; + $entries_per_set{all}{$blastout_hit} = 1; + $alnlens{all}{$hithsp->hsp_length} = 1; + $scores{all}{$score} += $hithsp->hsp_length; + ++$hsp{all}; + ++$hsp_per_query{$blastout_query}; + if ($hsp_per_query{$blastout_query} <= $max_hits_per_query) { + $mem{max}{$blastout_query}{$score}{$blastout_hit}{"$qstart $qend"}{"$hstart $hend"} = $hithsp->hsp_length; + $entries_per_set{max}{$blastout_query} = 1; + $entries_per_set{max}{$blastout_hit} = 1; + $alnlens{max}{$hithsp->hsp_length} = 1; + $scores{max}{$score} += $hithsp->hsp_length; + ++$hsp{max}; + } + } + } + } + } + } +} +unless (defined($query) && defined($database)) { + if (keys(%query_entries) == 0) { print "(!) could not find any queries, or BLAST output is in wrong format - exiting\n"; exit; + } elsif (keys(%query_entries) > $max_sequences) { print "(!) there are too many queries (" . keys(%query_entries) . " - we allow up to $max_sequences) - exiting\n"; exit; + } else { print "(=) " . keys(%query_entries) . " queries found\n"; } + + if (keys(%database_entries) == 0) { print "(!) could not find any database entries, or BLAST output is in wrong format - exiting\n"; exit; + } elsif (keys(%database_entries) > $max_sequences) { print "(!) there are too many database entries (" . keys(%database_entries) . " - we allow up to $max_sequences) - exiting\n"; exit; + } else { print "(=) " . keys(%database_entries) . " database entries found\n"; } + + if (keys(%lengths) > $max_sequences) { print "(!) you have too many entries in total (" . keys(%lengths) . " - we allow up to $max_sequences) - exiting\n"; exit; } + + if ($reverse_qorder) { + print "(?) reversing order of query ideograms\n"; + @{$ordered{query}} = reverse(@{$ordered{query}}); + } + if ($reverse_dorder) { + print "(?) reversing order of database ideograms\n"; + @{$ordered{database}} = reverse(@{$ordered{database}}); + } +} + +if ($hsp{all} == 0) { + print "(!) no hits found so maybe you can try different datasets or a more relaxed E-value - exiting\n"; exit; +} elsif ($hsp{all} > $max_ribbons) { + print "(!) $hsp{all} local alignments are too many (we allow up to $max_ribbons for clarity),\n\tso please consider stricter E-value or smaller input datasets\n"; + print "(?) in the meantime, we are switching to $max_hits_per_query hits per query\n"; + if ($hsp{max} > $max_ribbons) { + print "(!) $hsp{max} local alignments are still too many (we allow up to $max_ribbons) - exiting\n"; exit; + } else { + $set2consider = 'max'; + } +} else { + $set2consider = 'all'; +} + +$lensum2show = 0; +foreach $label (keys %{$entries_per_set{$set2consider}}) { $lensum2show += $lengths{$label}; } +if ($lensum2show > $max_total_length) { print "(!) the total length of your sequences to show is more than $max_total_length_in_mb megabases (currently $lensum2show bp) - exiting\n"; exit; } +$orient_light_width = int($lensum2show * 0.004); + +if (defined($annotation)) { + print "(?) loading annotation\n"; + open (ANNOTATION, "$annotation"); + while (<ANNOTATION>) { + if (!/^\#/ && /\w/) { + chomp; + @tabs = (split /\s+/); + $label = shift(@tabs); + $label = (split / /, $_)[0]; +# always update from above + $label =~ s/^>//; + $label =~ s/[^\w\-.]/_/g unless $dirty_labels; + $label =~ s/_{1,}/./g unless $dirty_labels; + if (defined($uniq_labels{$label})) { + $label = $uniq_labels{$label}; + } else { + $label = substr($label,0,$max_label_len); + } + if (defined($lengths{$label})) { + $kolour = shift(@tabs); + if ($kolour ne '-') { + $annotations{$label}{ideo_colour} = $kolour; + } + foreach $tab (@tabs) { + if ($tab =~ /:/) { + $domcol = (split /:/, $tab)[1]; + $tab = (split /:/, $tab)[0]; + } else { + $domcol = $kolours{'defdom'}; + } + $domfrom = (split /-/, $tab)[0]; + if ($domfrom < 0) { + $domfrom = 0; + } + $domto = (split /-/, $tab)[1]; + if ($domto > $lengths{$label}) { + $domto = $lengths{$label}; + } + if ($domfrom < $domto) { + $annotations{$label}{domains}{$domfrom}{to} = $domto; + $annotations{$label}{domains}{$domfrom}{col} = $domcol; + } + } + } + } + } + close ANNOTATION; + if (keys(%annotations) > 0) { print "(=) annotation loaded for " . keys(%annotations) . " entries\n"; + } else { print "(!) no annotation loaded although you provided a file, please check\n"; } +} + +print "(?) creating ribbons from $hsp{$set2consider} local alignments\n"; +if ($annocolour ne 'scores' && $invertcolour) { print "(!) cannot colour ribbons by both invertion and ideogram/domain/rainbow colours - exiting\n"; exit; +} elsif ($annocolour eq 'query') { print "(?) colouring ribbons by query ideogram/domain colours and $score2colour4report{$score2colour}\n"; +} elsif ($annocolour =~ /query_rainbow/) { print "(?) colouring ribbons by query rainbow colours and $score2colour4report{$score2colour}\n"; +} elsif ($annocolour eq 'database') { print "(?) colouring ribbons by database ideogram/domain colours and $score2colour4report{$score2colour}\n"; +} elsif ($annocolour =~ /database_rainbow/) { print "(?) colouring ribbons by database rainbow colours and $score2colour4report{$score2colour}\n"; +} elsif ($invertcolour) { print "(?) colouring ribbons by invertion and $score2colour4report{$score2colour}\n"; +} else { print "(?) colouring ribbons by $score2colour4report{$score2colour}\n"; } +if ($z_by eq 'score') { print "(?) depth-ordering ribbons by score, highest-scoring at the top\n"; +} elsif ($z_by eq 'score_rev') { print "(?) depth-ordering ribbons by reverse score, lowest-scoring at the top\n"; +} elsif ($z_by eq 'alnlen') { print "(?) depth-ordering ribbons by alignment length, longest at the top\n"; +} else { print "(?) depth-ordering ribbons by reverse alignment length, shortest at the top\n"; } +if ($abscolour && $score2colour ne 'id') { print "(!) absolute colouring currently only allowed with % identity - disabling\n"; $abscolour = 0; } +if ($abscolour) { print "(?) using absolute colouring with blue<=$maxB1, green<=$maxG2, orange<=$maxO3, red>$maxO3\n"; +} else { print "(?) using '$scoreratio2colour4report{$scoreratio2colour}' ratio colouring with blue<=0.25, green<=0.50, orange<=0.75, red>0.75\n"; } + +foreach $entry (sort {$b<=>$a} keys %{$alnlens{$set2consider}}) { $maxalnlen = $entry; last; } +foreach $score (sort {$b<=>$a} keys %{$scores{$set2consider}}) { $maxscore = $score; last; } +foreach $score (sort {$a<=>$b} keys %{$scores{$set2consider}}) { $minscore = $score; last; } +print "(=) min and max $score2colour4report{$score2colour}: ".(sprintf "%.2f", $minscore)." and ".(sprintf "%.2f", $maxscore)."\n"; + +$cold = 0; $warm = 0; $hot = 0; +foreach $score (keys %{$scores{$set2consider}}) { + if ($abscolour && $score2colour eq 'id') { + if ( $score <= $maxB1) { $cold += $scores{$set2consider}{$score}; + } else { + if ( $score > $maxO3) { $hot += $scores{$set2consider}{$score}; } + $warm += $scores{$set2consider}{$score}; } + } else { + if ($scoreratio2colour eq 'minmax') { + $scoreratio = ($score-$minscore) / ($maxscore-$minscore); + } else { $scoreratio = $score / $maxscore; } # max + if ( $scoreratio <= 0.5) { $cold += $scores{$set2consider}{$score}; + } else { + if ( $scoreratio > 0.75) { $hot += $scores{$set2consider}{$score}; } + $warm += $scores{$set2consider}{$score}; } + } +} + +$hiscolours2allow = "(blue|green|orange|red)"; print "(?) checking histogram data\n"; +if ($cold > $max_his_data) { + if ($warm > $max_his_data) { + if ($hot > $max_his_data) { print "(!) too much histogram data - disabling\n"; $hiscolours2allow = 'black'; + } else { print "(!) too much histogram data, will only consider red ribbons\n"; $hiscolours2allow = 'red'; } + } else { print "(!) too much histogram data, will only consider 'warm' ribbons\n"; $hiscolours2allow = '(orange|red)'; } +} +unless ($ribocolours2allow eq "(blue|green|orange|red)") { print "(?) showing only $ribocolours2allow ribbons, histograms are not affected\n"; } + +open (LINKS, ">$blastout.links") || die "Cannot create $blastout.links"; +$uid = 0; +foreach $query (keys %{$mem{$set2consider}}) { + $entries_w_hits{$query} = 1; + $per_query = 0; + %best_hit_entries = (); + foreach $score (sort {$b<=>$a} keys %{$mem{$set2consider}{$query}}) { + if ($abscolour && $score2colour eq 'id') { + if ( $score <= $maxB1) { $colour = $ribocolours{q1}; + } elsif ($score > $maxB1 && $score <= $maxG2) { $colour = $ribocolours{q2}; + } elsif ($score > $maxG2 && $score <= $maxO3) { $colour = $ribocolours{q3}; + } else { $colour = $ribocolours{q4}; } + } else { + if ($scoreratio2colour eq 'minmax') { + $scoreratio = ($score-$minscore) / ($maxscore-$minscore); + } else { $scoreratio = $score / $maxscore; } # max + if ( $scoreratio <= 0.25) { $colour = $ribocolours{q1}; + } elsif ($scoreratio > 0.25 && $scoreratio <= 0.5 ) { $colour = $ribocolours{q2}; + } elsif ($scoreratio > 0.5 && $scoreratio <= 0.75) { $colour = $ribocolours{q3}; + } else { $colour = $ribocolours{q4}; } + } + $best_colour = $out_type eq 'png' ? $colour . "_a1" : "black_a1"; + foreach $database (keys %{$mem{$set2consider}{$query}{$score}}) { + if (($best_hit && $best_hit_type eq 'entry' && defined($best_hit_entries{$database})) || keys(%best_hit_entries)==0 || !$best_hit) { + $best_hit_entries{$database} = 1; + $entries_w_hits{$database} = 1; + foreach $qaln (keys %{$mem{$set2consider}{$query}{$score}{$database}}) { + $a_from = (split / /, $qaln)[0]; + $a_to = (split / /, $qaln)[1]; + if ($per_query == 0) { # i.e. best score + if ($annocolour ne 'scores' || $invertcolour) { $stroker = ",stroke_color=$colour"."_a1".",stroke_thickness=2"; + } else { $stroker = ",stroke_color=$best_colour,stroke_thickness=2"; } + } else { + if ($annocolour ne 'scores' || $invertcolour) { $stroker = ",stroke_color=$colour"."_a2".",stroke_thickness=1"; + } else { $stroker = ""; } + } + foreach $daln (keys %{$mem{$set2consider}{$query}{$score}{$database}{$qaln}}) { + $b_from = (split / /, $daln)[0]; + $b_to = (split / /, $daln)[1]; + if ($z_by =~ /score/) { + if ($z_by eq 'score') { $z = $score; + } else { $z = $maxscore - $score; } + } else { + if ($z_by eq 'alnlen') { $z = $mem{$set2consider}{$query}{$score}{$database}{$qaln}{$daln}; + } else { $z = $maxalnlen - $mem{$set2consider}{$query}{$score}{$database}{$qaln}{$daln}; } + } + use bignum; + $z = $z + 0; + no bignum; + $ribocolour = $colour . "_a3"; + if ($annocolour =~ /query/) { + if (defined($annotations{$query}{ideo_colour})) { $ribocolour = $annotations{$query}{ideo_colour} . "_a3"; + } else { $ribocolour = $kolours{query} . "_a3"; } + foreach $dom_from (sort {$a<=>$b} keys %{$annotations{$query}{domains}}) { + $buffer = (abs($dom_from - $annotations{$query}{domains}{$dom_from}{to}) + 1) * 0.25; + if ($b_from >= ($dom_from - $buffer) && $b_to > $dom_from && $b_from < $annotations{$query}{domains}{$dom_from}{to} && $b_to <= ($annotations{$query}{domains}{$dom_from}{to} + $buffer)) { + $ribocolour = $annotations{$query}{domains}{$dom_from}{col} . "_a3"; + last; + } + } + } elsif ($annocolour =~ /database/) { + if (defined($annotations{$database}{ideo_colour})) { $ribocolour = $annotations{$database}{ideo_colour} . "_a3"; + } else { $ribocolour = $kolours{database} . "_a3"; } + foreach $dom_from (sort {$a<=>$b} keys %{$annotations{$database}{domains}}) { + $buffer = (abs($dom_from - $annotations{$database}{domains}{$dom_from}{to}) + 1) * 0.25; + if ($b_from >= ($dom_from - $buffer) && $b_to > $dom_from && $b_from < $annotations{$database}{domains}{$dom_from}{to} && $b_to <= ($annotations{$database}{domains}{$dom_from}{to} + $buffer)) { + $ribocolour = $annotations{$database}{domains}{$dom_from}{col} . "_a3"; + last; + } + } + } + if ($colour =~ /$hiscolours2allow/) { + if ($a_from > $a_to) { for ($i=$a_to;$i<=$a_from;$i++) { ++$histogram{$query}{$i}{$colour}; } + } else { for ($i=$a_from;$i<=$a_to;$i++) { ++$histogram{$query}{$i}{$colour}; }} + } + if ($b_from > $b_to) { + $inverter = ',twist=1'; + if ($invertcolour) { $ribocolour = 'lime_a3'; } + $dblink = "$database $b_to $b_from"; + if ($colour =~ /$hiscolours2allow/) { for ($i=$b_to;$i<=$b_from;$i++) { ++$histogram{$database}{$i}{$colour}; }} + } else { + $inverter = ',flat=1'; + if ($invertcolour) { $ribocolour = 'black_a3'; } + $dblink = "$database $b_from $b_to"; + if ($colour =~ /$hiscolours2allow/) { for ($i=$b_from;$i<=$b_to;$i++) { ++$histogram{$database}{$i}{$colour}; }} + } + if ($ribocolour =~ /$ribocolours2allow/ || $stroker =~ /$ribocolours2allow/) { + print LINKS "$query $qaln $dblink color=$ribocolour,z=$z,radius=0.98r$stroker$inverter\n"; + ++$uid; + } + ++$per_query; + } + } + } + } + if ($best_hit && $best_hit_type eq 'local') { last; } + } +} +close LINKS; +%mem = (); + +if ($hide_orient_lights) { print "(?) hiding orientation lights\n"; } + +if ($reverse_qorient) { print "(?) reversing orientation of query ideograms\n"; } +open (KARYOTYPE, ">$blastout.karyotype") || die "Cannot create $blastout.karyotype"; +foreach $entry (@{$ordered{query}}) { + if (($w_hits && defined($entries_w_hits{$entry})) || !$w_hits) { + if (defined($lengths{$entry}) && $lengths{$entry} > 0 && $entry =~ /\w/) { + $chr_order .= "$entry,"; + if (defined($annotations{$entry}{ideo_colour})) { print KARYOTYPE "chr - $entry $entry 0 $lengths{$entry} $annotations{$entry}{ideo_colour}\n"; + } else { print KARYOTYPE "chr - $entry $entry 0 $lengths{$entry} $kolours{query}\n"; } + $ultimately_shown{$entry} = 1; + if (defined($annotations{$entry}{domains})) { + $domid = 1; + foreach $from (sort {$a<=>$b} keys %{$annotations{$entry}{domains}}) { + print KARYOTYPE "band $entry dom$domid dom$domid $from $annotations{$entry}{domains}{$from}{to} $annotations{$entry}{domains}{$from}{col}\n"; + ++$domid; + } + } + if ($reverse_qorient) { $chr2rev .= "$entry,"; } + unless ($hide_orient_lights) { + if ($orient_light_width >= $lengths{$entry}/2 || $orient_light_width == 0) { $orient_light_width2use = int($lengths{$entry}/2); + } else { $orient_light_width2use = $orient_light_width; } + print KARYOTYPE "band $entry beg beg 0 $orient_light_width2use $greenlight\n"; + print KARYOTYPE "band $entry end end ".($lengths{$entry}-$orient_light_width2use)." $lengths{$entry} $redlight\n"; + } + } else {print "(!) a query entry has been problematic - ignoring\n"; } + } +} + +if ($reverse_dorient) { print "(?) reversing orientation of database ideograms\n"; } +$chr_radius = 'chromosomes_radius = '; #hs1:0.8r;a:0.9r;d:0.8r +foreach $entry (@{$ordered{database}}) { + if (($w_hits && defined($entries_w_hits{$entry})) || !$w_hits) { + if (defined($lengths{$entry}) && $lengths{$entry} > 0 && $entry =~ /\w/) { + unless (defined($query_entries{$entry})) { + $chr_order .= "$entry,"; + $chr_radius .= "$entry:0.92r;"; + if (defined($annotations{$entry}{ideo_colour})) { print KARYOTYPE "chr - $entry $entry 0 $lengths{$entry} $annotations{$entry}{ideo_colour}\n"; + } else { print KARYOTYPE "chr - $entry $entry 0 $lengths{$entry} $kolours{database}\n"; } + $ultimately_shown{$entry} = 1; + if (defined($annotations{$entry}{domains})) { + $domid = 1; + foreach $from (sort {$a<=>$b} keys %{$annotations{$entry}{domains}}) { + print KARYOTYPE "band $entry dom$domid dom$domid $from $annotations{$entry}{domains}{$from}{to} $annotations{$entry}{domains}{$from}{col}\n"; + ++$domid; + } + } + if ($reverse_dorient) { $chr2rev .= "$entry,"; } + unless ($hide_orient_lights) { + if ($orient_light_width >= $lengths{$entry}/2 || $orient_light_width == 0) { $orient_light_width2use = int($lengths{$entry}/2); + } else { $orient_light_width2use = $orient_light_width; } + print KARYOTYPE "band $entry beg beg 0 $orient_light_width2use $greenlight\n"; + print KARYOTYPE "band $entry end end ".($lengths{$entry}-$orient_light_width2use)." $lengths{$entry} $redlight\n"; + } + } + } else { print "(!) a database entry has been problematic - ignoring\n"; } + } +} +chop $chr_order; +chop $chr_radius; +chop $chr2rev; +close KARYOTYPE; + +if ($w_hits) { + $chr = 'chromosomes = '; + print "(?) only " . keys(%entries_w_hits) . " entries with hits will be shown\n"; + foreach $entry (sort keys %entries_w_hits) { $chr .= "$entry;"; } chop $chr; +} else { $chr = 'chromosomes_display_default = yes'; } + +if ($best_hit) { + if ($best_hit_type eq 'entry') { print "(?) only $uid best local alignments will be shown (= all local alignments of each best hit)\n"; + } else { print "(?) only $uid best local alignments will be shown (= single best local alignment for each best hit)\n"; } +} + +open (ORILABELS, ">$blastout.original_labels") || die "Cannot create $blastout.original_labels"; +$orilabels = ''; +foreach $label (sort keys %original_labels) { + if (defined($ultimately_shown{$label})) { + if ($original_labels{$label} ne $label) { $orilabels .= "$original_labels{$label}\t$label\n"; } + } +} +if ($orilabels =~ /\w/) { print ORILABELS "# original and Circoletto, tab-delimited (which might not be apparent if original label has whitespace)\n$orilabels"; } +close ORILABELS; + +print "(?) creating histogram\n" if %histogram; +@sorted_colours = ( +'red', +'orange', +'green', +'blue', +); +$max4his = 0; +foreach $label (keys %histogram) { + foreach $i (keys %{$histogram{$label}}) { + $tmp = ''; + $sum = 0; + foreach $colour (@sorted_colours[0..$#sorted_colours]) { + if (!defined($histogram{$label}{$i}{$colour})) { $histogram{$label}{$i}{$colour} = 0; } + $tmp .= "$histogram{$label}{$i}{$colour}.0,"; + $sum += $histogram{$label}{$i}{$colour}; + } + chop $tmp; + if ($tmp =~ /\d/) { $hismem{$label}{$tmp}{$i} = 1; } + if ($max4his <= $sum) { $max4his = $sum; } + } +} +open (HIS, ">$blastout.his") || die "Cannot create $blastout.his"; +foreach $label (sort keys %hismem) { + foreach $numbers (keys %{$hismem{$label}}) { + $from = 'na'; + $prev = 'na'; + foreach $coord (sort {$a<=>$b} keys %{$hismem{$label}{$numbers}}) { + if ($from eq 'na') { $from = $coord; } + if ($prev ne 'na' && ($coord > ($prev + 1) || $coord < ($prev - 1))) { + print HIS "$label $from $prev $numbers\n"; + $from = $coord; + } + $prev = $coord; + } + print HIS "$label $from $prev $numbers\n"; + } +} +close HIS; +$axis_thickness = 1; +$axis_spacing = int($max4his / 4); +if ($axis_spacing == 0) { + $axis_thickness = 0; + $axis_spacing = 1000; +} + +unless ($untangling_off) { + if ($reverse_qorder || $reverse_dorder) { print "(!) you have selected to reverse ideogram order, skipping untangling the $uid ribbons\n"; + } else { + if (keys(%lengths) <= 2) { print "(!) skipping untangling the $uid ribbons for less than 3 sequences\n"; + } elsif ($uid <= $max_ribbons2untangle) { + print "(?) untangling the $uid ribbons\n"; + `orderchr -links $blastout.links -karyotype $blastout.karyotype > $blastout.chr_order`; + open (ORDER, "$blastout.chr_order") || die "Cannot read $blastout.chr_order"; + while (<ORDER>) { + if (/chromosomes_order/) { + chomp; + $chr_order = $_; + } + } + close ORDER; + } else { print "(!) $uid ribbons will take too long to untangle (we allow up to $max_ribbons2untangle) - skipping\n"; } + } +} else { print "(?) untangling of ribbons has been switched off\n"; } + + +if ($annocolour =~ /rainbow/) { + if ($annocolour =~ /grey/) { $rainbow_palette = 'grey'; } else { $rainbow_palette = 'colour'; } + if ($annocolour =~ /query/) { $rainbow_on = 'query'; } else { $rainbow_on = 'database'; } + print "(?) drawing a $rainbow_palette rainbow based on the $rainbow_on\n"; + $rainbow_length = 0; + @split_chr = (split /,/, (split / = /,$chr_order)[1]); + foreach $chr (@split_chr) { + if ($rainbow_on eq 'query' && $query_entries{$chr} || $rainbow_on eq 'database' && $database_entries{$chr}) { + $rainbow_transl{$chr} += $rainbow_length; + $rainbow_length += $lengths{$chr}; + } + } + open (LINKS, "$blastout.links") || die "Cannot read $blastout.links"; + while (<LINKS>) { + chomp; + @split_link = (split /\s/); + if ( $rainbow_on eq 'query' && $query_entries{$split_link[0]}) { $rainbow_colour = ceil( (($rainbow_transl{$split_link[0]} + ($split_link[1]+$split_link[2])/2) / $rainbow_length) / (1/7) ); + } elsif ($rainbow_on eq 'database' && $database_entries{$split_link[3]}) { $rainbow_colour = ceil( (($rainbow_transl{$split_link[3]} + ($split_link[4]+$split_link[5])/2) / $rainbow_length) / (1/7) ); } + $rainbow_colour = "$rainbow_palette-rainbow-".$rainbow_colour.'_a2'; + s/color=[^,]+,/color=$rainbow_colour,/; + $rainbow_links .= "$_\n"; + } + close LINKS; + open (LINKS, ">$blastout.links") || die "Cannot create $blastout.links"; + print LINKS $rainbow_links; + close LINKS; +} + +if ($no_labels) { + print "(?) labels have been switched off\n"; + $show_label = "no"; +} else { + $show_label = "yes"; + $label_size = 11; + if ( $out_size eq '500p') { $label_size += -1; + } elsif ( $out_size eq '1000p') { $label_size += 1; + } elsif ( $out_size eq '2000p') { $label_size += 2; } + if ($out_type eq 'svg') { $label_size += 1; } + $label_size .= 'p'; +} + +@blastreport2split = (split '/',$blastout); +#$cleanblastreport = pop(@blastreport2split); +$out_name = defined $out_name ? $out_name : pop(@blastreport2split); +$record_limit = $max_ribbons * 2; +open (CONF, ">$blastout.conf") || die "Cannot create $blastout.conf"; +print CONF " +<<include etc/housekeeping.conf>> +<<include etc/colors_fonts_patterns.conf>> + +<colors> +red = 247, 42, 66 +green = 51, 204, 94 +blue = 54, 116, 217 +orange = 255, 136, 0 +lime = 186, 255, 0 + +#colour-rainbow-1 = 248, 12, 18 +#colour-rainbow-2 = 238, 17, 0 +#colour-rainbow-3 = 255, 51, 17 +#colour-rainbow-4 = 255, 68, 34 +#colour-rainbow-5 = 255, 102, 68 +#colour-rainbow-6 = 255, 153, 51 +#colour-rainbow-7 = 254, 174, 45 +#colour-rainbow-8 = 204, 187, 51 +#colour-rainbow-9 = 208, 195, 16 +#colour-rainbow-10 = 170, 204, 34 +#colour-rainbow-11 = 105, 208, 37 +#colour-rainbow-12 = 34, 204, 170 +#colour-rainbow-13 = 18, 189, 185 +#colour-rainbow-14 = 17, 170, 187 +#colour-rainbow-15 = 68, 68, 221 +#colour-rainbow-16 = 51, 17, 187 +#colour-rainbow-17 = 59, 12, 189 +#colour-rainbow-18 = 68, 34, 153 + +# Red web color Hex#FF0000 +# Orange color wheel Orange Hex#FF7F00 +# Yellow web color Hex#FFFF00 +# Green X11 Electric Green HTML/CSS “Lime” Color wheel green Hex#00FF00 +# Blue web color Hex#0000FF +# Indigo Electric Indigo Hex#4B0082 +# Violet Electric Violet Hex#8B00FF +colour-rainbow-1 = 255, 0, 0 +colour-rainbow-2 = 255, 127, 0 +colour-rainbow-3 = 255, 255, 0 +colour-rainbow-4 = 0, 255, 0 +colour-rainbow-5 = 0, 0, 255 +colour-rainbow-6 = 75, 0, 130 +colour-rainbow-7 = 143, 0, 255 + +grey-rainbow-1 = 230, 230, 230 +grey-rainbow-2 = 200, 200, 200 +grey-rainbow-3 = 170, 170, 170 +grey-rainbow-4 = 140, 140, 140 +grey-rainbow-5 = 110, 110, 110 +grey-rainbow-6 = 80, 80, 80 +grey-rainbow-7 = 50, 50, 50 +</colors> + +<ideogram> +<spacing> +default = 0.006r +break = 10u +axis_break_at_edge = no +axis_break = no +axis_break_style = 1 +</spacing> +thickness = 12p +stroke_thickness = 1 +stroke_color = black +fill = yes +fill_color = lgrey +radius = 0.66r +show_label = $show_label +label_with_tag = yes +label_font = condensed +label_color = grey +label_radius = dims(ideogram,radius) + 0.104r +label_size = $label_size +band_stroke_thickness = 0 +show_bands = yes +fill_bands = yes +</ideogram> + +show_ticks = yes +show_tick_labels = no +<ticks> +radius = dims(ideogram,radius_outer) +multiplier = 1e-6 +<tick> +spacing = 100u +rspacing = 0.1 +spacing_type = relative +size = 5p +thickness = 1p +color = vvvvdgrey +show_label = no +label_size = 10p +label_offset = 5p +format = %d +</tick> +</ticks> + +karyotype = $blastout.karyotype + +<image> +$dir +file = $out_name.$out_type +radius = $out_size +background = white +angle_offset = -90 +24bit = yes +auto_alpha_colors = yes +auto_alpha_steps = 5 +</image> + +chromosomes_units = 1 +$chr +$chr_order +$chr_radius +$chr2rev + +<links> +ribbon = yes +<link> +show = yes +file = $blastout.links +record_limit = $record_limit +</link> +</links> + +<plots> +<plot> +type = histogram +file = $blastout.his +extend_bin = no +color = white +fill_color = red,orange,green,blue +fill_under = yes +thickness = 0,0,0,0 +min = 0 +r0 = 1.015r +r1 = 1.092r +<axes> +<axis> +color = lgrey +thickness = $axis_thickness +spacing = $axis_spacing +</axis> +</axes> +</plot> +</plots> +"; +close CONF; + +print "(?) running Circos\n"; +`circos -noparanoid -conf $blastout.conf > $blastout.circos.log`; +sleep(1) if $online; +if (-e "$blastout.$out_type") { print "(=) done: $blastout.$out_type $blastout\n"; +} else { print "(!) there was an error running Circos (no output detected), maybe try once more or something different? - exiting\n"; exit; } + + +sub revcomp { + my $seq = reverse $_[0]; + $seq =~ tr/ACGTacgt/TGCAtgca/; + return $seq; +}