comparison variant_effect_predictor/Bio/Graphics/FeatureFile.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 package Bio::Graphics::FeatureFile;
2
3 # $Id: FeatureFile.pm,v 1.20.2.2 2003/08/29 19:30:10 lstein Exp $
4 # This package parses and renders a simple tab-delimited format for features.
5 # It is simpler than GFF, but still has a lot of expressive power.
6 # See __END__ for the file format
7
8 =head1 NAME
9
10 Bio::Graphics::FeatureFile -- A set of Bio::Graphics features, stored in a file
11
12 =head1 SYNOPSIS
13
14 use Bio::Graphics::FeatureFile;
15 my $data = Bio::Graphics::FeatureFile->new(-file => 'features.txt');
16
17
18 # create a new panel and render contents of the file onto it
19 my $panel = $data->new_panel;
20 my $tracks_rendered = $data->render($panel);
21
22 # or do it all in one step
23 my ($tracks_rendered,$panel) = $data->render;
24
25 # for more control, render tracks individually
26 my @feature_types = $data->types;
27 for my $type (@feature_types) {
28 my $features = $data->features($type);
29 my %options = $data->style($type);
30 $panel->add_track($features,%options); # assuming we have a Bio::Graphics::Panel
31 }
32
33 # get individual settings
34 my $est_fg_color = $data->setting(EST => 'fgcolor');
35
36 # or create the FeatureFile by hand
37
38 # add a type
39 $data->add_type(EST => {fgcolor=>'blue',height=>12});
40
41 # add a feature
42 my $feature = Bio::Graphics::Feature->new(
43 # params
44 ); # or some other SeqI
45 $data->add_feature($feature=>'EST');
46
47 =head1 DESCRIPTION
48
49 The Bio::Graphics::FeatureFile module reads and parses files that
50 describe sequence features and their renderings. It accepts both GFF
51 format and a more human-friendly file format described below. Once a
52 FeatureFile object has been initialized, you can interrogate it for
53 its consistuent features and their settings, or render the entire file
54 onto a Bio::Graphics::Panel.
55
56 This moduel is a precursor of Jason Stajich's
57 Bio::Annotation::Collection class, and fulfills a similar function of
58 storing a collection of sequence features. However, it also stores
59 rendering information about the features, and does not currently
60 follow the CollectionI interface.
61
62 =head2 The File Format
63
64 There are two types of entry in the file format: feature entries, and
65 formatting entries. They can occur in any order. See the Appendix
66 for a full example.
67
68 Feature entries can take several forms. At their simplest, they look
69 like this:
70
71 Gene B0511.1 516-11208
72
73 This means that a feature of type "Gene" and name "B0511.1" occupies
74 the range between bases 516 and 11208. A range can be specified
75 equally well using a hyphen, or two dots as in 516..11208. Negative
76 coordinates are allowed, such as -187..1000.
77
78 A discontinuous range ("split location") uses commas to separate the
79 ranges. For example:
80
81 Gene B0511.1 516-619,3185-3294,10946-11208
82
83 Alternatively, the locations can be split by repeating the features
84 type and name on multiple adjacent lines:
85
86 Gene B0511.1 516-619
87 Gene B0511.1 3185-3294
88 Gene B0511.1 10946-11208
89
90 A comment can be added to features by adding a fourth column. These
91 comments will be rendered as under-the-glyph descriptions by those
92 glyphs that honor descriptions:
93
94 Gene B0511.1 516-619,3185-3294,10946-11208 "Putative primase"
95
96 Columns are separated using whitespace, not (necessarily) tabs.
97 Embedded whitespace can be escaped using quote marks or backslashes in
98 the same way as in the shell:
99
100 'Putative Gene' my\ favorite\ gene 516-11208
101
102 Features can be grouped so that they are rendered by the "group" glyph
103 (so far this has only been used to relate 5' and 3' ESTs). To start a
104 group, create a two-column feature entry showing the group type and a
105 name for the group. Follow this with a list of feature entries with a
106 blank type. For example:
107
108 EST yk53c10
109 yk53c10.3 15000-15500,15700-15800
110 yk53c10.5 18892-19154
111
112 This example is declaring that the ESTs named yk53c10.3 and yk53c10.5
113 belong to the same group named yk53c10.
114
115 =cut
116
117 use strict;
118 use Bio::Graphics::Feature;
119 use Bio::DB::GFF::Util::Rearrange;
120 use Carp;
121 use IO::File;
122 use Text::Shellwords;
123
124 # default colors for unconfigured features
125 my @COLORS = qw(cyan blue red yellow green wheat turquoise orange);
126 use constant WIDTH => 600;
127
128 =head2 METHODS
129
130 =over 4
131
132 =item $features = Bio::Graphics::FeatureFile-E<gt>new(@args)
133
134 Create a new Bio::Graphics::FeatureFile using @args to initialize the
135 object. Arguments are -name=E<gt>value pairs:
136
137 Argument Value
138 -------- -----
139
140 -file Read data from a file path or filehandle. Use
141 "-" to read from standard input.
142
143 -text Read data from a text scalar.
144
145 -map_coords Coderef containing a subroutine to use for remapping
146 all coordinates.
147
148 -smart_features Flag indicating that the features created by this
149 module should be made aware of the FeatureFile
150 object by calling their configurator() method.
151
152 -safe Indicates that the contents of this file is trusted.
153 Any option value that begins with the string "sub {"
154 or \&subname will be evaluated as a code reference.
155
156 The -file and -text arguments are mutually exclusive, and -file will
157 supersede the other if both are present.
158
159 -map_coords points to a coderef with the following signature:
160
161 ($newref,[$start1,$end1],[$start2,$end2]....)
162 = coderef($ref,[$start1,$end1],[$start2,$end2]...)
163
164 See the Bio::Graphics::Browser (part of the generic genome browser
165 package) for an illustration of how to use this to do wonderful stuff.
166
167 The -smart_features flag is used by the generic genome browser to
168 provide features with a way to access the link-generation code. See
169 gbrowse for how this works.
170
171 If the file is trusted, and there is an option named "init_code" in
172 the [GENERAL] section of the file, it will be evaluated as perl code
173 immediately after parsing. You can use this to declare global
174 variables and subroutines for use in option values.
175
176 =back
177
178 =cut
179
180 # args array:
181 # -file => parse from a file (- allowed for ARGV)
182 # -text => parse from a text scalar
183 # -map_coords => code ref to do coordinate mapping
184 # called with ($ref,[$start1,$stop1],[$start2,$stop2]...)
185 # returns ($newref,$new_coord1,$new_coord2...)
186
187 sub new {
188 my $class = shift;
189 my %args = @_;
190 my $self = bless {
191 config => {},
192 features => {},
193 seenit => {},
194 types => [],
195 max => undef,
196 min => undef,
197 stat => [],
198 refs => {},
199 safe => undef,
200 },$class;
201 $self->{coordinate_mapper} = $args{-map_coords}
202 if exists $args{-map_coords} && ref($args{-map_coords}) eq 'CODE';
203 $self->{smart_features} = $args{-smart_features} if exists $args{-smart_features};
204 $self->{safe} = $args{-safe} if exists $args{-safe};
205
206 # call with
207 # -file
208 # -text
209 my $fh;
210 if (my $file = $args{-file}) {
211 no strict 'refs';
212 if (defined fileno($file)) {
213 $fh = $file;
214 } elsif ($file eq '-') {
215 $self->parse_argv();
216 } else {
217 $fh = IO::File->new($file) or croak("Can't open $file: $!\n");
218 }
219 $self->parse_file($fh);
220 } elsif (my $text = $args{-text}) {
221 $self->parse_text($text);
222 }
223 close($fh) or warn "Error closing file: $!" if $fh;
224 $self;
225 }
226
227 # render our features onto a panel using configuration data
228 # return the number of tracks inserted
229
230 =over 4
231
232 =item ($rendered,$panel) = $features-E<gt>render([$panel])
233
234 Render features in the data set onto the indicated
235 Bio::Graphics::Panel. If no panel is specified, creates one.
236
237 In a scalar context returns the number of tracks rendered. In a list
238 context, returns a two-element list containing the number of features
239 rendered and the panel. Use this form if you want the panel created
240 for you.
241
242 =back
243
244 =cut
245
246 #"
247
248 sub render {
249 my $self = shift;
250 my $panel = shift;
251 my ($position_to_insert,$options,$max_bump,$max_label) = @_;
252
253 $panel ||= $self->new_panel;
254
255 # count up number of tracks inserted
256 my $tracks = 0;
257 my $color;
258 my %types = map {$_=>1} $self->configured_types;
259
260 my @configured_types = grep {exists $self->{features}{$_}} $self->configured_types;
261 my @unconfigured_types = sort grep {!exists $types{$_}} $self->types;
262
263 my @base_config = $self->style('general');
264
265 my @override = ();
266 if ($options && ref $options eq 'HASH') {
267 @override = %$options;
268 } else {
269 $options ||= 0;
270 if ($options == 1) { # compact
271 push @override,(-bump => 0,-label=>0);
272 } elsif ($options == 2) { #expanded
273 push @override,(-bump=>1);
274 } elsif ($options == 3) { #expand and label
275 push @override,(-bump=>1,-label=>1);
276 } elsif ($options == 4) { #hyperexpand
277 push @override,(-bump => 2);
278 } elsif ($options == 5) { #hyperexpand and label
279 push @override,(-bump => 2,-label=>1);
280 }
281 }
282
283 for my $type (@configured_types,@unconfigured_types) {
284 my $features = $self->features($type);
285 my @auto_bump;
286 push @auto_bump,(-bump => @$features < $max_bump) if defined $max_bump;
287 push @auto_bump,(-label => @$features < $max_label) if defined $max_label;
288
289 my @config = ( -glyph => 'segments', # really generic
290 -bgcolor => $COLORS[$color++ % @COLORS],
291 -label => 1,
292 -key => $type,
293 @auto_bump,
294 @base_config, # global
295 $self->style($type), # feature-specific
296 @override,
297 );
298 if (defined($position_to_insert)) {
299 $panel->insert_track($position_to_insert++,$features,@config);
300 } else {
301 $panel->add_track($features,@config);
302 }
303 $tracks++;
304 }
305 return wantarray ? ($tracks,$panel) : $tracks;
306 }
307
308 sub _stat {
309 my $self = shift;
310 my $fh = shift;
311 $self->{stat} = [stat($fh)];
312 }
313
314 =over 4
315
316 =item $error = $features-E<gt>error([$error])
317
318 Get/set the current error message.
319
320 =back
321
322 =cut
323
324 sub error {
325 my $self = shift;
326 my $d = $self->{error};
327 $self->{error} = shift if @_;
328 $d;
329 }
330
331 =over 4
332
333 =item $smart_features = $features-E<gt>smart_features([$flag]
334
335 Get/set the "smart_features" flag. If this is set, then any features
336 added to the featurefile object will have their configurator() method
337 called using the featurefile object as the argument.
338
339 =back
340
341 =cut
342
343 sub smart_features {
344 my $self = shift;
345 my $d = $self->{smart_features};
346 $self->{smart_features} = shift if @_;
347 $d;
348 }
349
350 sub parse_argv {
351 my $self = shift;
352
353 $self->init_parse;
354 while (<>) {
355 chomp;
356 $self->parse_line($_);
357 }
358 $self->finish_parse;
359 }
360
361 sub parse_file {
362 my $self = shift;
363 my $fh = shift or return;
364 $self->_stat($fh);
365
366 $self->init_parse;
367 while (<$fh>) {
368 chomp;
369 $self->parse_line($_);
370 }
371 $self->finish_parse;
372 }
373
374 sub parse_text {
375 my $self = shift;
376 my $text = shift;
377
378 $self->init_parse;
379 foreach (split /\015?\012|\015\012?/,$text) {
380 $self->parse_line($_);
381 }
382 $self->finish_parse;
383 }
384
385 sub parse_line {
386 my $self = shift;
387 local $_ = shift;
388
389 s/\015//g; # get rid of carriage returns left over by MS-DOS/Windows systems
390
391 return if /^\s*[\#]/;
392
393 if (/^\s+(.+)/ && $self->{current_tag}) { # continuation line
394 my $value = $1;
395 my $cc = $self->{current_config} ||= 'general'; # in case no configuration named
396 $self->{config}{$cc}{$self->{current_tag}} .= ' ' . $value;
397 # respect newlines in code subs
398 $self->{config}{$cc}{$self->{current_tag}} .= "\n"
399 if $self->{config}{$cc}{$self->{current_tag}}=~ /^sub\s*\{/;
400 return;
401 }
402
403 if (/^\s*\[([^\]]+)\]/) { # beginning of a configuration section
404 my $label = $1;
405 my $cc = $label =~ /^(general|default)$/i ? 'general' : $label; # normalize
406 push @{$self->{types}},$cc unless $cc eq 'general';
407 $self->{current_config} = $cc;
408 return;
409 }
410
411 if (/^([\w: -]+?)\s*=\s*(.*)/) { # key value pair within a configuration section
412 my $tag = lc $1;
413 my $cc = $self->{current_config} ||= 'general'; # in case no configuration named
414 my $value = defined $2 ? $2 : '';
415 $self->{config}{$cc}{$tag} = $value;
416 $self->{current_tag} = $tag;
417 return;
418 }
419
420
421 if (/^$/) { # empty line
422 undef $self->{current_tag};
423 return;
424 }
425
426 # parse data lines
427 my @tokens = eval { shellwords($_||'') };
428 unshift @tokens,'' if /^\s+/;
429
430 # close any open group
431 if (length $tokens[0] > 0 && $self->{group}) {
432 push @{$self->{features}{$self->{grouptype}}},$self->{group};
433 undef $self->{group};
434 undef $self->{grouptype};
435 }
436
437 if (@tokens < 3) { # short line; assume a group identifier
438 my $type = shift @tokens;
439 my $name = shift @tokens;
440 $self->{group} = Bio::Graphics::Feature->new(-name => $name,
441 -type => 'group');
442 $self->{grouptype} = $type;
443 return;
444 }
445
446 my($ref,$type,$name,$strand,$bounds,$description,$url);
447
448 if (@tokens >= 8) { # conventional GFF file
449 my ($r,$source,$method,$start,$stop,$score,$s,$phase,@rest) = @tokens;
450 my $group = join ' ',@rest;
451 $type = join(':',$method,$source);
452 $bounds = join '..',$start,$stop;
453 $strand = $s;
454 if ($group) {
455 my ($notes,@notes);
456 (undef,$self->{groupname},undef,undef,$notes) = split_group($group);
457 foreach (@$notes) {
458 if (m!^(http|ftp)://!) { $url = $_ } else { push @notes,$_ }
459 }
460 $description = join '; ',@notes if @notes;
461 }
462 $name ||= $self->{group}->display_id if $self->{group};
463 $ref = $r;
464 }
465
466 elsif ($tokens[2] =~ /^([+-.]|[+-]?[01])$/) { # old simplified version
467 ($type,$name,$strand,$bounds,$description,$url) = @tokens;
468 } else { # new simplified version
469 ($type,$name,$bounds,$description,$url) = @tokens;
470 }
471
472 $type ||= $self->{grouptype} || '';
473 $type =~ s/\s+$//; # get rid of excess whitespace
474
475 # the reference is specified by the GFF reference line first,
476 # the last reference line we saw second,
477 # or the reference line in the "general" section.
478 {
479 local $^W = 0;
480 $ref ||= $self->{config}{$self->{current_config}}{'reference'}
481 || $self->{config}{general}{reference};
482 }
483 $self->{refs}{$ref}++ if defined $ref;
484
485 my @parts = map { [/(-?\d+)(?:-|\.\.)(-?\d+)/]} split /(?:,| )\s*/,$bounds;
486
487 foreach (@parts) { # max and min calculation, sigh...
488 $self->{min} = $_->[0] if !defined $self->{min} || $_->[0] < $self->{min};
489 $self->{max} = $_->[1] if !defined $self->{max} || $_->[1] > $self->{max};
490 }
491
492 if ($self->{coordinate_mapper} && $ref) {
493 ($ref,@parts) = $self->{coordinate_mapper}->($ref,@parts);
494 return unless $ref;
495 }
496
497 $type = '' unless defined $type;
498 $name = '' unless defined $name;
499
500 # attribute handling
501 my %attributes;
502 my $score;
503 if (defined $description && $description =~ /\w+=\w+/) { # attribute line
504 my @attributes = split /;\s*/,$description;
505 foreach (@attributes) {
506 my ($name,$value) = split /=/,$_,2;
507 Bio::Root::Root->throw(qq("$_" is not a valid attribute=value pair)) unless defined $value;
508 _unescape($name);
509 my @values = split /,/,$value;
510 _unescape(@values);
511 if ($name =~ /^(note|description)/) {
512 $description = "@values";
513 } elsif ($name eq 'url') {
514 $url = $value;
515 } elsif ($name eq 'score') {
516 $score = $value;
517 } else {
518 push @{$attributes{$name}},@values;
519 }
520 }
521 }
522
523 # either create a new feature or add a segment to it
524 if (my $feature = $self->{seenit}{$type,$name}) {
525
526 # create a new first part
527 if (!$feature->segments) {
528 $feature->add_segment(Bio::Graphics::Feature->new(-type => $feature->type,
529 -strand => $feature->strand,
530 -start => $feature->start,
531 -end => $feature->end));
532 }
533 $feature->add_segment(@parts);
534 }
535
536 else {
537 my @coordinates = @parts > 1 ? (-segments => \@parts) : (-start=>$parts[0][0],-end=>$parts[0][1]);
538 $feature = $self->{seenit}{$type,$name} =
539 Bio::Graphics::Feature->new(-name => $name,
540 -type => $type,
541 $strand ? (-strand => make_strand($strand)) : (),
542 defined $score ? (-score=>$score) : (),
543 -desc => $description,
544 -ref => $ref,
545 -attributes => \%attributes,
546 defined($url) ? (-url => $url) : (),
547 @coordinates,
548 );
549 $feature->configurator($self) if $self->smart_features;
550 if ($self->{group}) {
551 $self->{group}->add_segment($feature);
552 } else {
553 push @{$self->{features}{$type}},$feature; # for speed; should use add_feature() instead
554 }
555 }
556 }
557
558 sub _unescape {
559 foreach (@_) {
560 tr/+/ /; # pluses become spaces
561 s/%([0-9a-fA-F]{2})/chr hex($1)/g;
562 }
563 @_;
564 }
565
566 =over 4
567
568 =item $features-E<gt>add_feature($feature [=E<gt>$type])
569
570 Add a new Bio::FeatureI object to the set. If $type is specified, the
571 object will be added with the indicated type. Otherwise, the
572 feature's primary_tag() method will be invoked to get the type.
573
574 =back
575
576 =cut
577
578 # add a feature of given type to our list
579 # we use the primary_tag() method
580 sub add_feature {
581 my $self = shift;
582 my ($feature,$type) = @_;
583 $type = $feature->primary_tag unless defined $type;
584 push @{$self->{features}{$type}},$feature;
585 }
586
587
588 =over 4
589
590 =item $features-E<gt>add_type($type=E<gt>$hashref)
591
592 Add a new feature type to the set. The type is a string, such as
593 "EST". The hashref is a set of key=E<gt>value pairs indicating options to
594 set on the type. Example:
595
596 $features->add_type(EST => { glyph => 'generic', fgcolor => 'blue'})
597
598 When a feature of type "EST" is rendered, it will use the generic
599 glyph and have a foreground color of blue.
600
601 =back
602
603 =cut
604
605 # Add a type to the list. Hash values are used for key/value pairs
606 # in the configuration. Call as add_type($type,$configuration) where
607 # $configuration is a hashref.
608 sub add_type {
609 my $self = shift;
610 my ($type,$type_configuration) = @_;
611 my $cc = $type =~ /^(general|default)$/i ? 'general' : $type; # normalize
612 push @{$self->{types}},$cc unless $cc eq 'general' or $self->{config}{$cc};
613 if (defined $type_configuration) {
614 for my $tag (keys %$type_configuration) {
615 $self->{config}{$cc}{lc $tag} = $type_configuration->{$tag};
616 }
617 }
618 }
619
620
621
622 =over 4
623
624 =item $features-E<gt>set($type,$tag,$value)
625
626 Change an individual option for a particular type. For example, this
627 will change the foreground color of EST features to my favorite color:
628
629 $features->set('EST',fgcolor=>'chartreuse')
630
631 =back
632
633 =cut
634
635 # change configuration of a type. Call as set($type,$tag,$value)
636 # $type will be added if not already there.
637 sub set {
638 my $self = shift;
639 croak("Usage: \$featurefile->set(\$type,\$tag,\$value\n")
640 unless @_ == 3;
641 my ($type,$tag,$value) = @_;
642 unless ($self->{config}{$type}) {
643 return $self->add_type($type,{$tag=>$value});
644 } else {
645 $self->{config}{$type}{lc $tag} = $value;
646 }
647 }
648
649 # break circular references
650 sub destroy {
651 my $self = shift;
652 delete $self->{features};
653 }
654
655 sub DESTROY { shift->destroy(@_) }
656
657 =over 4
658
659 =item $value = $features-E<gt>setting($stanza =E<gt> $option)
660
661 In the two-element form, the setting() method returns the value of an
662 option in the configuration stanza indicated by $stanza. For example:
663
664 $value = $features->setting(general => 'height')
665
666 will return the value of the "height" option in the [general] stanza.
667
668 Call with one element to retrieve all the option names in a stanza:
669
670 @options = $features->setting('general');
671
672 Call with no elements to retrieve all stanza names:
673
674 @stanzas = $features->setting;
675
676 =back
677
678 =cut
679
680 sub setting {
681 my $self = shift;
682 if ($self->safe) {
683 $self->code_setting(@_);
684 } else {
685 $self->_setting(@_);
686 }
687 }
688
689 # return configuration information
690 # arguments are ($type) => returns tags for type
691 # ($type=>$tag) => returns values of tag on type
692 sub _setting {
693 my $self = shift;
694 my $config = $self->{config} or return;
695 return keys %{$config} unless @_;
696 return keys %{$config->{$_[0]}} if @_ == 1;
697 return $config->{$_[0]}{$_[1]} if @_ > 1;
698 }
699
700
701 =over 4
702
703 =item $value = $features-E<gt>code_setting($stanza=E<gt>$option);
704
705 This works like setting() except that it is also able to evaluate code
706 references. These are options whose values begin with the characters
707 "sub {". In this case the value will be passed to an eval() and the
708 resulting codereference returned. Use this with care!
709
710 =back
711
712 =cut
713
714 sub code_setting {
715 my $self = shift;
716 my $section = shift;
717 my $option = shift;
718
719 my $setting = $self->_setting($section=>$option);
720 return unless defined $setting;
721 return $setting if ref($setting) eq 'CODE';
722 if ($setting =~ /^\\&(\w+)/) { # coderef in string form
723 my $subroutine_name = $1;
724 my $package = $self->base2package;
725 my $codestring = "\\&${package}\:\:${subroutine_name}";
726 my $coderef = eval $codestring;
727 warn $@ if $@;
728 $self->set($section,$option,$coderef);
729 return $coderef;
730 }
731 elsif ($setting =~ /^sub\s*\{/) {
732 my $coderef = eval $setting;
733 warn $@ if $@;
734 $self->set($section,$option,$coderef);
735 return $coderef;
736 } else {
737 return $setting;
738 }
739 }
740
741 =over 4
742
743 =item $flag = $features-E<gt>safe([$flag]);
744
745 This gets or sets and "safe" flag. If the safe flag is set, then
746 calls to setting() will invoke code_setting(), allowing values that
747 begin with the string "sub {" to be interpreted as anonymous
748 subroutines. This is a potential security risk when used with
749 untrusted files of features, so use it with care.
750
751 =back
752
753 =cut
754
755 sub safe {
756 my $self = shift;
757 my $d = $self->{safe};
758 $self->{safe} = shift if @_;
759 $self->evaluate_coderefs if $self->{safe} && !$d;
760 $d;
761 }
762
763
764 =over 4
765
766 =item @args = $features-E<gt>style($type)
767
768 Given a feature type, returns a list of track configuration arguments
769 suitable for suitable for passing to the
770 Bio::Graphics::Panel-E<gt>add_track() method.
771
772 =back
773
774 =cut
775
776 # turn configuration into a set of -name=>value pairs suitable for add_track()
777 sub style {
778 my $self = shift;
779 my $type = shift;
780
781 my $config = $self->{config} or return;
782 my $hashref = $config->{$type} or return;
783
784 return map {("-$_" => $hashref->{$_})} keys %$hashref;
785 }
786
787
788 =over 4
789
790 =item $glyph = $features-E<gt>glyph($type);
791
792 Return the name of the glyph corresponding to the given type (same as
793 $features-E<gt>setting($type=E<gt>'glyph')).
794
795 =back
796
797 =cut
798
799 # retrieve just the glyph part of the configuration
800 sub glyph {
801 my $self = shift;
802 my $type = shift;
803 my $config = $self->{config} or return;
804 my $hashref = $config->{$type} or return;
805 return $hashref->{glyph};
806 }
807
808
809 =over 4
810
811 =item @types = $features-E<gt>configured_types()
812
813 Return a list of all the feature types currently known to the feature
814 file set. Roughly equivalent to:
815
816 @types = grep {$_ ne 'general'} $features->setting;
817
818 =back
819
820 =cut
821
822 # return list of configured types, in proper order
823 sub configured_types {
824 my $self = shift;
825 my $types = $self->{types} or return;
826 return @{$types};
827 }
828
829 =over 4
830
831 =item @types = $features-E<gt>types()
832
833 This is similar to the previous method, but will return *all* feature
834 types, including those that are not configured with a stanza.
835
836 =back
837
838 =cut
839
840 sub types {
841 my $self = shift;
842 my $features = $self->{features} or return;
843 return keys %{$features};
844 }
845
846
847 =over 4
848
849 =item $features = $features-E<gt>features($type)
850
851 Return a list of all the feature types of type "$type". If the
852 featurefile object was created by parsing a file or text scalar, then
853 the features will be of type Bio::Graphics::Feature (which follow the
854 Bio::FeatureI interface). Otherwise the list will contain objects of
855 whatever type you added with calls to add_feature().
856
857 Two APIs:
858
859 1) original API:
860
861 # Reference to an array of all features of type "$type"
862 $features = $features-E<gt>features($type)
863
864 # Reference to an array of all features of all types
865 $features = $features-E<gt>features()
866
867 # A list when called in a list context
868 @features = $features-E<gt>features()
869
870 2) Bio::Das::SegmentI API:
871
872 @features = $features-E<gt>features(-type=>['list','of','types']);
873
874 # variants
875 $features = $features-E<gt>features(-type=>['list','of','types']);
876 $features = $features-E<gt>features(-type=>'a type');
877 $iterator = $features-E<gt>features(-type=>'a type',-iterator=>1);
878
879 =back
880
881 =cut
882
883 # return features
884 sub features {
885 my $self = shift;
886 my ($types,$iterator,@rest) = $_[0]=~/^-/ ? rearrange([['TYPE','TYPES']],@_) : (\@_);
887 $types = [$types] if $types && !ref($types);
888 my @types = ($types && @$types) ? @$types : $self->types;
889 my @features = map {@{$self->{features}{$_}}} @types;
890 if ($iterator) {
891 require Bio::Graphics::FeatureFile::Iterator;
892 return Bio::Graphics::FeatureFile::Iterator->new(\@features);
893 }
894 return wantarray ? @features : \@features;
895 }
896
897 =over 4
898
899 =item @features = $features-E<gt>features($type)
900
901 Return a list of all the feature types of type "$type". If the
902 featurefile object was created by parsing a file or text scalar, then
903 the features will be of type Bio::Graphics::Feature (which follow the
904 Bio::FeatureI interface). Otherwise the list will contain objects of
905 whatever type you added with calls to add_feature().
906
907 =back
908
909 =cut
910
911 sub make_strand {
912 local $^W = 0;
913 return +1 if $_[0] =~ /^\+/ || $_[0] > 0;
914 return -1 if $_[0] =~ /^\-/ || $_[0] < 0;
915 return 0;
916 }
917
918 =head2 get_seq_stream
919
920 Title : get_seq_stream
921 Usage : $stream = $s->get_seq_stream(@args)
922 Function: get a stream of features that overlap this segment
923 Returns : a Bio::SeqIO::Stream-compliant stream
924 Args : see below
925 Status : Public
926
927 This is the same as feature_stream(), and is provided for Bioperl
928 compatibility. Use like this:
929
930 $stream = $s->get_seq_stream('exon');
931 while (my $exon = $stream->next_seq) {
932 print $exon->start,"\n";
933 }
934
935 =cut
936
937 sub get_seq_stream {
938 my $self = shift;
939 local $^W = 0;
940 my @args = $_[0] =~ /^-/ ? (@_,-iterator=>1) : (-types=>\@_,-iterator=>1);
941 $self->features(@args);
942 }
943
944 =head2 get_feature_stream(), top_SeqFeatures(), all_SeqFeatures()
945
946 Provided for compatibility with older BioPerl and/or Bio::DB::GFF
947 APIs.
948
949 =cut
950
951 *get_feature_stream = \&get_seq_stream;
952 *top_SeqFeatures = *all_SeqFeatures = \&features;
953
954
955 =over 4
956
957 =item @refs = $features-E<gt>refs
958
959 Return the list of reference sequences referred to by this data file.
960
961 =back
962
963 =cut
964
965 sub refs {
966 my $self = shift;
967 my $refs = $self->{refs} or return;
968 keys %$refs;
969 }
970
971 =over 4
972
973 =item $min = $features-E<gt>min
974
975 Return the minimum coordinate of the leftmost feature in the data set.
976
977 =back
978
979 =cut
980
981 sub min { shift->{min} }
982
983 =over 4
984
985 =item $max = $features-E<gt>max
986
987 Return the maximum coordinate of the rightmost feature in the data set.
988
989 =back
990
991 =cut
992
993 sub max { shift->{max} }
994
995 sub init_parse {
996 my $s = shift;
997
998 $s->{seenit} = {};
999 $s->{max} = $s->{min} = undef;
1000 $s->{types} = [];
1001 $s->{features} = {};
1002 $s->{config} = {}
1003 }
1004
1005 sub finish_parse {
1006 my $s = shift;
1007 $s->evaluate_coderefs if $s->safe;
1008 $s->{seenit} = {};
1009 }
1010
1011 sub evaluate_coderefs {
1012 my $self = shift;
1013 $self->initialize_code();
1014 for my $s ($self->_setting) {
1015 for my $o ($self->_setting($s)) {
1016 $self->code_setting($s,$o);
1017 }
1018 }
1019 }
1020
1021 sub initialize_code {
1022 my $self = shift;
1023 my $package = $self->base2package;
1024 my $init_code = $self->_setting(general => 'init_code') or return;
1025 my $code = "package $package; $init_code; 1;";
1026 eval $code;
1027 warn $@ if $@;
1028 }
1029
1030 sub base2package {
1031 my $self = shift;
1032 (my $package = overload::StrVal($self)) =~ s/[^a-z0-9A-Z_]/_/g;
1033 $package =~ s/^[^a-zA-Z_]/_/g;
1034 $package;
1035 }
1036
1037 sub split_group {
1038 my $group = shift;
1039
1040 $group =~ s/\\;/$;/g; # protect embedded semicolons in the group
1041 $group =~ s/( \"[^\"]*);([^\"]*\")/$1$;$2/g;
1042 my @groups = split(/\s*;\s*/,$group);
1043 foreach (@groups) { s/$;/;/g }
1044
1045 my ($gclass,$gname,$tstart,$tstop,@notes);
1046
1047 foreach (@groups) {
1048
1049 my ($tag,$value) = /^(\S+)\s*(.*)/;
1050 $value =~ s/\\t/\t/g;
1051 $value =~ s/\\r/\r/g;
1052 $value =~ s/^"//;
1053 $value =~ s/"$//;
1054
1055 # if the tag is "Note", then we add this to the
1056 # notes array
1057 if ($tag eq 'Note') { # just a note, not a group!
1058 push @notes,$value;
1059 }
1060
1061 # if the tag eq 'Target' then the class name is embedded in the ID
1062 # (the GFF format is obviously screwed up here)
1063 elsif ($tag eq 'Target' && $value =~ /([^:\"]+):([^\"]+)/) {
1064 ($gclass,$gname) = ($1,$2);
1065 ($tstart,$tstop) = /(\d+) (\d+)/;
1066 }
1067
1068 elsif (!$value) {
1069 push @notes,$tag; # e.g. "Confirmed_by_EST"
1070 }
1071
1072 # otherwise, the tag and value correspond to the
1073 # group class and name
1074 else {
1075 ($gclass,$gname) = ($tag,$value);
1076 }
1077 }
1078
1079 return ($gclass,$gname,$tstart,$tstop,\@notes);
1080 }
1081
1082 # create a panel if needed
1083 sub new_panel {
1084 my $self = shift;
1085
1086 require Bio::Graphics::Panel;
1087
1088 # general configuration of the image here
1089 my $width = $self->setting(general => 'pixels')
1090 || $self->setting(general => 'width')
1091 || WIDTH;
1092
1093 my ($start,$stop);
1094 my $range_expr = '(-?\d+)(?:-|\.\.)(-?\d+)';
1095
1096 if (my $bases = $self->setting(general => 'bases')) {
1097 ($start,$stop) = $bases =~ /([\d-]+)(?:-|\.\.)([\d-]+)/;
1098 }
1099
1100 if (!defined $start || !defined $stop) {
1101 $start = $self->min unless defined $start;
1102 $stop = $self->max unless defined $stop;
1103 }
1104
1105 my $new_segment = Bio::Graphics::Feature->new(-start=>$start,-stop=>$stop);
1106 my $panel = Bio::Graphics::Panel->new(-segment => $new_segment,
1107 -width => $width,
1108 -key_style => 'between');
1109 $panel;
1110 }
1111
1112 =over 4
1113
1114 =item $mtime = $features-E<gt>mtime
1115
1116 =item $atime = $features-E<gt>atime
1117
1118 =item $ctime = $features-E<gt>ctime
1119
1120 =item $size = $features-E<gt>size
1121
1122 Returns stat() information about the data file, for featurefile
1123 objects created using the -file option. Size is in bytes. mtime,
1124 atime, and ctime are in seconds since the epoch.
1125
1126 =back
1127
1128 =cut
1129
1130 sub mtime {
1131 my $self = shift;
1132 my $d = $self->{m_time} || $self->{stat}->[9];
1133 $self->{m_time} = shift if @_;
1134 $d;
1135 }
1136 sub atime { shift->{stat}->[8]; }
1137 sub ctime { shift->{stat}->[10]; }
1138 sub size { shift->{stat}->[7]; }
1139
1140 =over 4
1141
1142 =item $label = $features-E<gt>feature2label($feature)
1143
1144 Given a feature, determines the configuration stanza that bests
1145 describes it. Uses the feature's type() method if it has it (DasI
1146 interface) or its primary_tag() method otherwise.
1147
1148 =back
1149
1150 =cut
1151
1152 sub feature2label {
1153 my $self = shift;
1154 my $feature = shift;
1155 my $type = eval {$feature->type} || $feature->primary_tag or return;
1156 (my $basetype = $type) =~ s/:.+$//;
1157 my @labels = $self->type2label($type);
1158 @labels = $self->type2label($basetype) unless @labels;
1159 @labels = ($type) unless @labels;;
1160 wantarray ? @labels : $labels[0];
1161 }
1162
1163 =over 4
1164
1165 =item $link = $features-E<gt>make_link($feature)
1166
1167 Given a feature, tries to generate a URL to link out from it. This
1168 uses the 'link' option, if one is present. This method is a
1169 convenience for the generic genome browser.
1170
1171 =back
1172
1173 =cut
1174
1175 sub make_link {
1176 my $self = shift;
1177 my $feature = shift;
1178 for my $label ($self->feature2label($feature)) {
1179 my $link = $self->setting($label,'link');
1180 $link = $self->setting(general=>'link') unless defined $link;
1181 next unless $link;
1182 return $self->link_pattern($link,$feature);
1183 }
1184 return;
1185 }
1186
1187 sub link_pattern {
1188 my $self = shift;
1189 my ($pattern,$feature,$panel) = @_;
1190 require CGI unless defined &CGI::escape;
1191 my $n;
1192 $pattern =~ s/\$(\w+)/
1193 CGI::escape(
1194 $1 eq 'ref' ? ($n = $feature->location->seq_id) && "$n"
1195 : $1 eq 'name' ? ($n = $feature->display_name) && "$n" # workaround broken CGI.pm
1196 : $1 eq 'class' ? eval {$feature->class} || ''
1197 : $1 eq 'type' ? eval {$feature->method} || $feature->primary_tag
1198 : $1 eq 'method' ? eval {$feature->method} || $feature->primary_tag
1199 : $1 eq 'source' ? eval {$feature->source} || $feature->source_tag
1200 : $1 eq 'start' ? $feature->start
1201 : $1 eq 'end' ? $feature->end
1202 : $1 eq 'stop' ? $feature->end
1203 : $1 eq 'segstart' ? $panel->start
1204 : $1 eq 'segend' ? $panel->end
1205 : $1 eq 'description' ? eval {join '',$feature->notes} || ''
1206 : $1
1207 )
1208 /exg;
1209 return $pattern;
1210 }
1211
1212 # given a feature type, return its label(s)
1213 sub type2label {
1214 my $self = shift;
1215 my $type = shift;
1216 $self->{_type2label} ||= $self->invert_types;
1217 my @labels = keys %{$self->{_type2label}{$type}};
1218 wantarray ? @labels : $labels[0]
1219 }
1220
1221 sub invert_types {
1222 my $self = shift;
1223 my $config = $self->{config} or return;
1224 my %inverted;
1225 for my $label (keys %{$config}) {
1226 my $feature = $config->{$label}{feature} or next;
1227 foreach (shellwords($feature||'')) {
1228 $inverted{$_}{$label}++;
1229 }
1230 }
1231 \%inverted;
1232 }
1233
1234 =over 4
1235
1236 =item $citation = $features-E<gt>citation($feature)
1237
1238 Given a feature, tries to generate a citation for it, using the
1239 "citation" option if one is present. This method is a convenience for
1240 the generic genome browser.
1241
1242 =back
1243
1244 =cut
1245
1246 # This routine returns the "citation" field. It is here in order to simplify the logic
1247 # a bit in the generic browser
1248 sub citation {
1249 my $self = shift;
1250 my $feature = shift || 'general';
1251 return $self->setting($feature=>'citation');
1252 }
1253
1254 =over 4
1255
1256 =item $name = $features-E<gt>name([$feature])
1257
1258 Get/set the name of this feature set. This is a convenience method
1259 useful for keeping track of multiple feature sets.
1260
1261 =back
1262
1263 =cut
1264
1265 # give this feature file a nickname
1266 sub name {
1267 my $self = shift;
1268 my $d = $self->{name};
1269 $self->{name} = shift if @_;
1270 $d;
1271 }
1272
1273 1;
1274
1275 __END__
1276
1277 =head1 Appendix -- Sample Feature File
1278
1279 # file begins
1280 [general]
1281 pixels = 1024
1282 bases = 1-20000
1283 reference = Contig41
1284 height = 12
1285
1286 [Cosmid]
1287 glyph = segments
1288 fgcolor = blue
1289 key = C. elegans conserved regions
1290
1291 [EST]
1292 glyph = segments
1293 bgcolor= yellow
1294 connector = dashed
1295 height = 5;
1296
1297 [FGENESH]
1298 glyph = transcript2
1299 bgcolor = green
1300 description = 1
1301
1302 Cosmid B0511 516-619
1303 Cosmid B0511 3185-3294
1304 Cosmid B0511 10946-11208
1305 Cosmid B0511 13126-13511
1306 Cosmid B0511 11394-11539
1307 EST yk260e10.5 15569-15724
1308 EST yk672a12.5 537-618,3187-3294
1309 EST yk595e6.5 552-618
1310 EST yk595e6.5 3187-3294
1311 EST yk846e07.3 11015-11208
1312 EST yk53c10
1313 yk53c10.3 15000-15500,15700-15800
1314 yk53c10.5 18892-19154
1315 EST yk53c10.5 16032-16105
1316 SwissProt PECANEX 13153-13656 Swedish fish
1317 FGENESH Predicted gene 1 1-205,518-616,661-735,3187-3365,3436-3846 Pfam domain
1318 FGENESH Predicted gene 2 5513-6497,7968-8136,8278-8383,8651-8839,9462-9515,10032-10705,10949-11340,11387-11524,11765-12067,12876-13577,13882-14121,14169-14535,15006-15209,15259-15462,15513-15753,15853-16219 Mysterious
1319 FGENESH Predicted gene 3 16626-17396,17451-17597
1320 FGENESH Predicted gene 4 18459-18722,18882-19176,19221-19513,19572-19835 Transmembrane protein
1321 # file ends
1322
1323 =head1 SEE ALSO
1324
1325 L<Bio::Graphics::Panel>,
1326 L<Bio::Graphics::Glyph>,
1327 L<Bio::Graphics::Feature>,
1328 L<Bio::Graphics::FeatureFile>
1329
1330 =head1 AUTHOR
1331
1332 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
1333
1334 Copyright (c) 2001 Cold Spring Harbor Laboratory
1335
1336 This library is free software; you can redistribute it and/or modify
1337 it under the same terms as Perl itself. See DISCLAIMER.txt for
1338 disclaimers of warranty.
1339
1340 =cut
1341
1342
1343