comparison variant_effect_predictor/Bio/ClusterIO.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: ClusterIO.pm,v 1.11.2.1 2003/01/21 01:11:17 jason Exp $
2 #
3 # BioPerl module for Bio::ClusterIO.pm
4 #
5 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
6 #
7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
9 # http://anatomy.otago.ac.nz/meg
10 #
11 # You may distribute this module under the same terms as perl itself
12 #
13 # _history
14 #
15 # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
16 # by Andrew Macgregor
17 #
18 # April 17, 2002 - Initial implementation by Andrew Macgregor
19 # POD documentation - main docs before the code
20
21 =head1 NAME
22
23 Bio::ClusterIO - Handler for Cluster Formats
24
25 =head1 SYNOPSIS
26
27 #NB: This example is unigene specific
28
29 use Bio::ClusterIO;
30
31 $stream = Bio::ClusterIO->new('-file' => "Hs.data",
32 '-format' => "unigene");
33 # note: we quote -format to keep older perl's from complaining.
34
35 while ( my $in = $stream->next_cluster() ) {
36 print $in->unigene_id() . "\n";
37 while ( my $sequence = $in->next_seq() ) {
38 print $sequence->accession_number() . "\n";
39 }
40 }
41 # Parsing errors are printed to STDERR.
42
43 =head1 DESCRIPTION
44
45 The ClusterIO module works with the ClusterIO format module to read
46 various cluster formats such as NCBI UniGene.
47
48
49 =head1 CONSTRUCTORS
50
51 =head2 Bio::ClusterIO-E<gt>new()
52
53 $str = Bio::ClusterIO->new(-file => 'filename',
54 -format=>$format);
55
56 The new() class method constructs a new Bio::ClusterIO object. The
57 returned object can be used to retrieve or print cluster
58 objects. new() accepts the following parameters:
59
60 =over 4
61
62 =item -file
63
64 A file path to be opened for reading.
65
66 =item -format
67
68 Specify the format of the file. Supported formats include:
69
70 unigene *.data UniGene build files.
71 dbsnp *.xml dbSNP XML files
72
73 If no format is specified and a filename is given, then the module
74 will attempt to deduce it from the filename. If this is unsuccessful,
75 the main UniGene build format is assumed.
76
77 The format name is case insensitive. 'UNIGENE', 'UniGene' and
78 'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
79
80 =back
81
82 =head1 OBJECT METHODS
83
84 See below for more detailed summaries. The main methods are:
85
86 =head2 $cluster = $str-E<gt>next_cluster()
87
88 Fetch the next cluster from the stream.
89
90
91 =head2 TIEHANDLE(), READLINE(), PRINT()
92
93 These I've left in here because they were in the SeqIO
94 module. Feedback appreciated. There they provide the tie interface.
95 See L<perltie> for more details.
96
97 =head1 FEEDBACK
98
99 =head2 Mailing Lists
100
101 User feedback is an integral part of the evolution of this
102 and other Bioperl modules. Send your comments and suggestions preferably
103 to one of the Bioperl mailing lists.
104 Your participation is much appreciated.
105
106 bioperl-l@bioperl.org - General discussion
107 http://bioperl.org/MailList.shtml - About the mailing lists
108
109 =head2 Reporting Bugs
110
111 Report bugs to the Bioperl bug tracking system to help us keep track
112 the bugs and their resolution.
113 Bug reports can be submitted via email or the web:
114
115 bioperl-bugs@bioperl.org
116 http://bugzilla.bioperl.org/
117
118 =head1 AUTHOR - Andrew Macgregor
119
120 Email andrew@anatomy.otago.ac.nz
121
122 =head1 APPENDIX
123
124 The rest of the documentation details each of the object
125 methods. Internal methods are usually preceded with a _
126
127 =cut
128
129 #'
130 # Let the code begin...
131
132 package Bio::ClusterIO;
133
134 use strict;
135 use vars qw(@ISA);
136
137 use Bio::Root::Root;
138 use Bio::Root::IO;
139
140 @ISA = qw(Bio::Root::Root Bio::Root::IO);
141
142
143
144 =head2 new
145
146 Title : new
147 Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format')
148 Function: Returns a new cluster stream
149 Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
150 Args : -file => $filename
151 -format => format
152
153 =cut
154
155
156 my $entry = 0;
157
158 sub new {
159 my ($caller,@args) = @_;
160 my $class = ref($caller) || $caller;
161
162 # or do we want to call SUPER on an object if $caller is an
163 # object?
164 if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
165 my ($self) = $class->SUPER::new(@args);
166 $self->_initialize(@args);
167 return $self;
168 } else {
169
170 my %param = @args;
171 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
172 my $format = $param{'-format'} ||
173 $class->_guess_format( $param{-file} || $ARGV[0] );
174 $format = "\L$format"; # normalize capitalization to lower case
175
176 return undef unless( $class->_load_format_module($format) );
177 return "Bio::ClusterIO::$format"->new(@args);
178 }
179 }
180
181
182 # _initialize is chained for all ClusterIO classes
183
184 sub _initialize {
185 my($self, @args) = @_;
186 # initialize the IO part
187 $self->_initialize_io(@args);
188 }
189
190 =head2 next_cluster
191
192 Title : next_cluster
193 Usage : $cluster = $stream->next_cluster()
194 Function: Reads the next cluster object from the stream and returns it.
195 Returns : a L<Bio::ClusterI> compliant object
196 Args : none
197
198
199 =cut
200
201 sub next_cluster {
202 my ($self, $seq) = @_;
203 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
204 }
205
206 =head2 cluster_factory
207
208 Title : cluster_factory
209 Usage : $obj->cluster_factory($newval)
210 Function: Get/set the object factory to use for creating the cluster
211 objects.
212 Example :
213 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
214 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
215 compliant object or undef, optional)
216
217
218 =cut
219
220 sub cluster_factory{
221 my $self = shift;
222
223 return $self->{'cluster_factory'} = shift if @_;
224 return $self->{'cluster_factory'};
225 }
226
227 =head2 object_factory
228
229 Title : object_factory
230 Usage : $obj->object_factory($newval)
231 Function: This is an alias to cluster_factory with a more generic name.
232 Example :
233 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object
234 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI>
235 compliant object or undef, optional)
236
237
238 =cut
239
240 sub object_factory{
241 return shift->cluster_factory(@_);
242 }
243
244 =head2 _load_format_module
245
246 Title : _load_format_module
247 Usage : *INTERNAL ClusterIO stuff*
248 Function: Loads up (like use) a module at run time on demand
249 Example :
250 Returns :
251 Args :
252
253 =cut
254
255 sub _load_format_module {
256 my ($self,$format) = @_;
257 my $module = "Bio::ClusterIO::" . $format;
258 my $ok;
259
260 eval {
261 $ok = $self->_load_module($module);
262 };
263 if ( $@ ) {
264 print STDERR <<END;
265 $self: could not load $format - for more details on supported formats please see the ClusterIO docs
266 Exception $@
267 END
268 ;
269 }
270 return $ok;
271 }
272
273 =head2 _guess_format
274
275 Title : _guess_format
276 Usage : $obj->_guess_format($filename)
277 Function: guess format based on file suffix
278 Example :
279 Returns : guessed format of filename (lower case)
280 Args :
281 Notes : formats that _filehandle() will guess include unigene and dbsnp
282
283 =cut
284
285 sub _guess_format {
286 my $class = shift;
287 return unless $_ = shift;
288 return 'unigene' if /\.(data)$/i;
289 return 'dbsnp' if /\.(xml)$/i;
290 }
291
292 sub DESTROY {
293 my $self = shift;
294
295 $self->close();
296 }
297
298 # I need some direction on these!! The module works so I haven't fiddled with them!
299
300 sub TIEHANDLE {
301 my ($class,$val) = @_;
302 return bless {'seqio' => $val}, $class;
303 }
304
305 sub READLINE {
306 my $self = shift;
307 return $self->{'seqio'}->next_seq() unless wantarray;
308 my (@list, $obj);
309 push @list, $obj while $obj = $self->{'seqio'}->next_seq();
310 return @list;
311 }
312
313 sub PRINT {
314 my $self = shift;
315 $self->{'seqio'}->write_seq(@_);
316 }
317
318 1;
319