0
|
1 # $Id: ClusterIO.pm,v 1.11.2.1 2003/01/21 01:11:17 jason Exp $
|
|
2 #
|
|
3 # BioPerl module for Bio::ClusterIO.pm
|
|
4 #
|
|
5 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
|
|
6 #
|
|
7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
|
|
8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
|
|
9 # http://anatomy.otago.ac.nz/meg
|
|
10 #
|
|
11 # You may distribute this module under the same terms as perl itself
|
|
12 #
|
|
13 # _history
|
|
14 #
|
|
15 # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
|
|
16 # by Andrew Macgregor
|
|
17 #
|
|
18 # April 17, 2002 - Initial implementation by Andrew Macgregor
|
|
19 # POD documentation - main docs before the code
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::ClusterIO - Handler for Cluster Formats
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 #NB: This example is unigene specific
|
|
28
|
|
29 use Bio::ClusterIO;
|
|
30
|
|
31 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
|
|
32 '-format' => "unigene");
|
|
33 # note: we quote -format to keep older perl's from complaining.
|
|
34
|
|
35 while ( my $in = $stream->next_cluster() ) {
|
|
36 print $in->unigene_id() . "\n";
|
|
37 while ( my $sequence = $in->next_seq() ) {
|
|
38 print $sequence->accession_number() . "\n";
|
|
39 }
|
|
40 }
|
|
41 # Parsing errors are printed to STDERR.
|
|
42
|
|
43 =head1 DESCRIPTION
|
|
44
|
|
45 The ClusterIO module works with the ClusterIO format module to read
|
|
46 various cluster formats such as NCBI UniGene.
|
|
47
|
|
48
|
|
49 =head1 CONSTRUCTORS
|
|
50
|
|
51 =head2 Bio::ClusterIO-E<gt>new()
|
|
52
|
|
53 $str = Bio::ClusterIO->new(-file => 'filename',
|
|
54 -format=>$format);
|
|
55
|
|
56 The new() class method constructs a new Bio::ClusterIO object. The
|
|
57 returned object can be used to retrieve or print cluster
|
|
58 objects. new() accepts the following parameters:
|
|
59
|
|
60 =over 4
|
|
61
|
|
62 =item -file
|
|
63
|
|
64 A file path to be opened for reading.
|
|
65
|
|
66 =item -format
|
|
67
|
|
68 Specify the format of the file. Supported formats include:
|
|
69
|
|
70 unigene *.data UniGene build files.
|
|
71 dbsnp *.xml dbSNP XML files
|
|
72
|
|
73 If no format is specified and a filename is given, then the module
|
|
74 will attempt to deduce it from the filename. If this is unsuccessful,
|
|
75 the main UniGene build format is assumed.
|
|
76
|
|
77 The format name is case insensitive. 'UNIGENE', 'UniGene' and
|
|
78 'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
|
|
79
|
|
80 =back
|
|
81
|
|
82 =head1 OBJECT METHODS
|
|
83
|
|
84 See below for more detailed summaries. The main methods are:
|
|
85
|
|
86 =head2 $cluster = $str-E<gt>next_cluster()
|
|
87
|
|
88 Fetch the next cluster from the stream.
|
|
89
|
|
90
|
|
91 =head2 TIEHANDLE(), READLINE(), PRINT()
|
|
92
|
|
93 These I've left in here because they were in the SeqIO
|
|
94 module. Feedback appreciated. There they provide the tie interface.
|
|
95 See L<perltie> for more details.
|
|
96
|
|
97 =head1 FEEDBACK
|
|
98
|
|
99 =head2 Mailing Lists
|
|
100
|
|
101 User feedback is an integral part of the evolution of this
|
|
102 and other Bioperl modules. Send your comments and suggestions preferably
|
|
103 to one of the Bioperl mailing lists.
|
|
104 Your participation is much appreciated.
|
|
105
|
|
106 bioperl-l@bioperl.org - General discussion
|
|
107 http://bioperl.org/MailList.shtml - About the mailing lists
|
|
108
|
|
109 =head2 Reporting Bugs
|
|
110
|
|
111 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
112 the bugs and their resolution.
|
|
113 Bug reports can be submitted via email or the web:
|
|
114
|
|
115 bioperl-bugs@bioperl.org
|
|
116 http://bugzilla.bioperl.org/
|
|
117
|
|
118 =head1 AUTHOR - Andrew Macgregor
|
|
119
|
|
120 Email andrew@anatomy.otago.ac.nz
|
|
121
|
|
122 =head1 APPENDIX
|
|
123
|
|
124 The rest of the documentation details each of the object
|
|
125 methods. Internal methods are usually preceded with a _
|
|
126
|
|
127 =cut
|
|
128
|
|
129 #'
|
|
130 # Let the code begin...
|
|
131
|
|
132 package Bio::ClusterIO;
|
|
133
|
|
134 use strict;
|
|
135 use vars qw(@ISA);
|
|
136
|
|
137 use Bio::Root::Root;
|
|
138 use Bio::Root::IO;
|
|
139
|
|
140 @ISA = qw(Bio::Root::Root Bio::Root::IO);
|
|
141
|
|
142
|
|
143
|
|
144 =head2 new
|
|
145
|
|
146 Title : new
|
|
147 Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format')
|
|
148 Function: Returns a new cluster stream
|
|
149 Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
|
|
150 Args : -file => $filename
|
|
151 -format => format
|
|
152
|
|
153 =cut
|
|
154
|
|
155
|
|
156 my $entry = 0;
|
|
157
|
|
158 sub new {
|
|
159 my ($caller,@args) = @_;
|
|
160 my $class = ref($caller) || $caller;
|
|
161
|
|
162 # or do we want to call SUPER on an object if $caller is an
|
|
163 # object?
|
|
164 if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
|
|
165 my ($self) = $class->SUPER::new(@args);
|
|
166 $self->_initialize(@args);
|
|
167 return $self;
|
|
168 } else {
|
|
169
|
|
170 my %param = @args;
|
|
171 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
|
|
172 my $format = $param{'-format'} ||
|
|
173 $class->_guess_format( $param{-file} || $ARGV[0] );
|
|
174 $format = "\L$format"; # normalize capitalization to lower case
|
|
175
|
|
176 return undef unless( $class->_load_format_module($format) );
|
|
177 return "Bio::ClusterIO::$format"->new(@args);
|
|
178 }
|
|
179 }
|
|
180
|
|
181
|
|
182 # _initialize is chained for all ClusterIO classes
|
|
183
|
|
184 sub _initialize {
|
|
185 my($self, @args) = @_;
|
|
186 # initialize the IO part
|
|
187 $self->_initialize_io(@args);
|
|
188 }
|
|
189
|
|
190 =head2 next_cluster
|
|
191
|
|
192 Title : next_cluster
|
|
193 Usage : $cluster = $stream->next_cluster()
|
|
194 Function: Reads the next cluster object from the stream and returns it.
|
|
195 Returns : a L<Bio::ClusterI> compliant object
|
|
196 Args : none
|
|
197
|
|
198
|
|
199 =cut
|
|
200
|
|
201 sub next_cluster {
|
|
202 my ($self, $seq) = @_;
|
|
203 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
|
|
204 }
|
|
205
|
|
206 =head2 cluster_factory
|
|
207
|
|
208 Title : cluster_factory
|
|
209 Usage : $obj->cluster_factory($newval)
|
|
210 Function: Get/set the object factory to use for creating the cluster
|
|
211 objects.
|
|
212 Example :
|
|
213 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
|
|
214 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
|
|
215 compliant object or undef, optional)
|
|
216
|
|
217
|
|
218 =cut
|
|
219
|
|
220 sub cluster_factory{
|
|
221 my $self = shift;
|
|
222
|
|
223 return $self->{'cluster_factory'} = shift if @_;
|
|
224 return $self->{'cluster_factory'};
|
|
225 }
|
|
226
|
|
227 =head2 object_factory
|
|
228
|
|
229 Title : object_factory
|
|
230 Usage : $obj->object_factory($newval)
|
|
231 Function: This is an alias to cluster_factory with a more generic name.
|
|
232 Example :
|
|
233 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
|
|
234 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
|
|
235 compliant object or undef, optional)
|
|
236
|
|
237
|
|
238 =cut
|
|
239
|
|
240 sub object_factory{
|
|
241 return shift->cluster_factory(@_);
|
|
242 }
|
|
243
|
|
244 =head2 _load_format_module
|
|
245
|
|
246 Title : _load_format_module
|
|
247 Usage : *INTERNAL ClusterIO stuff*
|
|
248 Function: Loads up (like use) a module at run time on demand
|
|
249 Example :
|
|
250 Returns :
|
|
251 Args :
|
|
252
|
|
253 =cut
|
|
254
|
|
255 sub _load_format_module {
|
|
256 my ($self,$format) = @_;
|
|
257 my $module = "Bio::ClusterIO::" . $format;
|
|
258 my $ok;
|
|
259
|
|
260 eval {
|
|
261 $ok = $self->_load_module($module);
|
|
262 };
|
|
263 if ( $@ ) {
|
|
264 print STDERR <<END;
|
|
265 $self: could not load $format - for more details on supported formats please see the ClusterIO docs
|
|
266 Exception $@
|
|
267 END
|
|
268 ;
|
|
269 }
|
|
270 return $ok;
|
|
271 }
|
|
272
|
|
273 =head2 _guess_format
|
|
274
|
|
275 Title : _guess_format
|
|
276 Usage : $obj->_guess_format($filename)
|
|
277 Function: guess format based on file suffix
|
|
278 Example :
|
|
279 Returns : guessed format of filename (lower case)
|
|
280 Args :
|
|
281 Notes : formats that _filehandle() will guess include unigene and dbsnp
|
|
282
|
|
283 =cut
|
|
284
|
|
285 sub _guess_format {
|
|
286 my $class = shift;
|
|
287 return unless $_ = shift;
|
|
288 return 'unigene' if /\.(data)$/i;
|
|
289 return 'dbsnp' if /\.(xml)$/i;
|
|
290 }
|
|
291
|
|
292 sub DESTROY {
|
|
293 my $self = shift;
|
|
294
|
|
295 $self->close();
|
|
296 }
|
|
297
|
|
298 # I need some direction on these!! The module works so I haven't fiddled with them!
|
|
299
|
|
300 sub TIEHANDLE {
|
|
301 my ($class,$val) = @_;
|
|
302 return bless {'seqio' => $val}, $class;
|
|
303 }
|
|
304
|
|
305 sub READLINE {
|
|
306 my $self = shift;
|
|
307 return $self->{'seqio'}->next_seq() unless wantarray;
|
|
308 my (@list, $obj);
|
|
309 push @list, $obj while $obj = $self->{'seqio'}->next_seq();
|
|
310 return @list;
|
|
311 }
|
|
312
|
|
313 sub PRINT {
|
|
314 my $self = shift;
|
|
315 $self->{'seqio'}->write_seq(@_);
|
|
316 }
|
|
317
|
|
318 1;
|
|
319
|