Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/ClusterIO.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: ClusterIO.pm,v 1.11.2.1 2003/01/21 01:11:17 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::ClusterIO.pm | |
4 # | |
5 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz> | |
6 # | |
7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green | |
8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago | |
9 # http://anatomy.otago.ac.nz/meg | |
10 # | |
11 # You may distribute this module under the same terms as perl itself | |
12 # | |
13 # _history | |
14 # | |
15 # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm | |
16 # by Andrew Macgregor | |
17 # | |
18 # April 17, 2002 - Initial implementation by Andrew Macgregor | |
19 # POD documentation - main docs before the code | |
20 | |
21 =head1 NAME | |
22 | |
23 Bio::ClusterIO - Handler for Cluster Formats | |
24 | |
25 =head1 SYNOPSIS | |
26 | |
27 #NB: This example is unigene specific | |
28 | |
29 use Bio::ClusterIO; | |
30 | |
31 $stream = Bio::ClusterIO->new('-file' => "Hs.data", | |
32 '-format' => "unigene"); | |
33 # note: we quote -format to keep older perl's from complaining. | |
34 | |
35 while ( my $in = $stream->next_cluster() ) { | |
36 print $in->unigene_id() . "\n"; | |
37 while ( my $sequence = $in->next_seq() ) { | |
38 print $sequence->accession_number() . "\n"; | |
39 } | |
40 } | |
41 # Parsing errors are printed to STDERR. | |
42 | |
43 =head1 DESCRIPTION | |
44 | |
45 The ClusterIO module works with the ClusterIO format module to read | |
46 various cluster formats such as NCBI UniGene. | |
47 | |
48 | |
49 =head1 CONSTRUCTORS | |
50 | |
51 =head2 Bio::ClusterIO-E<gt>new() | |
52 | |
53 $str = Bio::ClusterIO->new(-file => 'filename', | |
54 -format=>$format); | |
55 | |
56 The new() class method constructs a new Bio::ClusterIO object. The | |
57 returned object can be used to retrieve or print cluster | |
58 objects. new() accepts the following parameters: | |
59 | |
60 =over 4 | |
61 | |
62 =item -file | |
63 | |
64 A file path to be opened for reading. | |
65 | |
66 =item -format | |
67 | |
68 Specify the format of the file. Supported formats include: | |
69 | |
70 unigene *.data UniGene build files. | |
71 dbsnp *.xml dbSNP XML files | |
72 | |
73 If no format is specified and a filename is given, then the module | |
74 will attempt to deduce it from the filename. If this is unsuccessful, | |
75 the main UniGene build format is assumed. | |
76 | |
77 The format name is case insensitive. 'UNIGENE', 'UniGene' and | |
78 'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP | |
79 | |
80 =back | |
81 | |
82 =head1 OBJECT METHODS | |
83 | |
84 See below for more detailed summaries. The main methods are: | |
85 | |
86 =head2 $cluster = $str-E<gt>next_cluster() | |
87 | |
88 Fetch the next cluster from the stream. | |
89 | |
90 | |
91 =head2 TIEHANDLE(), READLINE(), PRINT() | |
92 | |
93 These I've left in here because they were in the SeqIO | |
94 module. Feedback appreciated. There they provide the tie interface. | |
95 See L<perltie> for more details. | |
96 | |
97 =head1 FEEDBACK | |
98 | |
99 =head2 Mailing Lists | |
100 | |
101 User feedback is an integral part of the evolution of this | |
102 and other Bioperl modules. Send your comments and suggestions preferably | |
103 to one of the Bioperl mailing lists. | |
104 Your participation is much appreciated. | |
105 | |
106 bioperl-l@bioperl.org - General discussion | |
107 http://bioperl.org/MailList.shtml - About the mailing lists | |
108 | |
109 =head2 Reporting Bugs | |
110 | |
111 Report bugs to the Bioperl bug tracking system to help us keep track | |
112 the bugs and their resolution. | |
113 Bug reports can be submitted via email or the web: | |
114 | |
115 bioperl-bugs@bioperl.org | |
116 http://bugzilla.bioperl.org/ | |
117 | |
118 =head1 AUTHOR - Andrew Macgregor | |
119 | |
120 Email andrew@anatomy.otago.ac.nz | |
121 | |
122 =head1 APPENDIX | |
123 | |
124 The rest of the documentation details each of the object | |
125 methods. Internal methods are usually preceded with a _ | |
126 | |
127 =cut | |
128 | |
129 #' | |
130 # Let the code begin... | |
131 | |
132 package Bio::ClusterIO; | |
133 | |
134 use strict; | |
135 use vars qw(@ISA); | |
136 | |
137 use Bio::Root::Root; | |
138 use Bio::Root::IO; | |
139 | |
140 @ISA = qw(Bio::Root::Root Bio::Root::IO); | |
141 | |
142 | |
143 | |
144 =head2 new | |
145 | |
146 Title : new | |
147 Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format') | |
148 Function: Returns a new cluster stream | |
149 Returns : A Bio::ClusterIO::Handler initialised with the appropriate format | |
150 Args : -file => $filename | |
151 -format => format | |
152 | |
153 =cut | |
154 | |
155 | |
156 my $entry = 0; | |
157 | |
158 sub new { | |
159 my ($caller,@args) = @_; | |
160 my $class = ref($caller) || $caller; | |
161 | |
162 # or do we want to call SUPER on an object if $caller is an | |
163 # object? | |
164 if( $class =~ /Bio::ClusterIO::(\S+)/ ) { | |
165 my ($self) = $class->SUPER::new(@args); | |
166 $self->_initialize(@args); | |
167 return $self; | |
168 } else { | |
169 | |
170 my %param = @args; | |
171 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
172 my $format = $param{'-format'} || | |
173 $class->_guess_format( $param{-file} || $ARGV[0] ); | |
174 $format = "\L$format"; # normalize capitalization to lower case | |
175 | |
176 return undef unless( $class->_load_format_module($format) ); | |
177 return "Bio::ClusterIO::$format"->new(@args); | |
178 } | |
179 } | |
180 | |
181 | |
182 # _initialize is chained for all ClusterIO classes | |
183 | |
184 sub _initialize { | |
185 my($self, @args) = @_; | |
186 # initialize the IO part | |
187 $self->_initialize_io(@args); | |
188 } | |
189 | |
190 =head2 next_cluster | |
191 | |
192 Title : next_cluster | |
193 Usage : $cluster = $stream->next_cluster() | |
194 Function: Reads the next cluster object from the stream and returns it. | |
195 Returns : a L<Bio::ClusterI> compliant object | |
196 Args : none | |
197 | |
198 | |
199 =cut | |
200 | |
201 sub next_cluster { | |
202 my ($self, $seq) = @_; | |
203 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object."); | |
204 } | |
205 | |
206 =head2 cluster_factory | |
207 | |
208 Title : cluster_factory | |
209 Usage : $obj->cluster_factory($newval) | |
210 Function: Get/set the object factory to use for creating the cluster | |
211 objects. | |
212 Example : | |
213 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object | |
214 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI> | |
215 compliant object or undef, optional) | |
216 | |
217 | |
218 =cut | |
219 | |
220 sub cluster_factory{ | |
221 my $self = shift; | |
222 | |
223 return $self->{'cluster_factory'} = shift if @_; | |
224 return $self->{'cluster_factory'}; | |
225 } | |
226 | |
227 =head2 object_factory | |
228 | |
229 Title : object_factory | |
230 Usage : $obj->object_factory($newval) | |
231 Function: This is an alias to cluster_factory with a more generic name. | |
232 Example : | |
233 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object | |
234 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI> | |
235 compliant object or undef, optional) | |
236 | |
237 | |
238 =cut | |
239 | |
240 sub object_factory{ | |
241 return shift->cluster_factory(@_); | |
242 } | |
243 | |
244 =head2 _load_format_module | |
245 | |
246 Title : _load_format_module | |
247 Usage : *INTERNAL ClusterIO stuff* | |
248 Function: Loads up (like use) a module at run time on demand | |
249 Example : | |
250 Returns : | |
251 Args : | |
252 | |
253 =cut | |
254 | |
255 sub _load_format_module { | |
256 my ($self,$format) = @_; | |
257 my $module = "Bio::ClusterIO::" . $format; | |
258 my $ok; | |
259 | |
260 eval { | |
261 $ok = $self->_load_module($module); | |
262 }; | |
263 if ( $@ ) { | |
264 print STDERR <<END; | |
265 $self: could not load $format - for more details on supported formats please see the ClusterIO docs | |
266 Exception $@ | |
267 END | |
268 ; | |
269 } | |
270 return $ok; | |
271 } | |
272 | |
273 =head2 _guess_format | |
274 | |
275 Title : _guess_format | |
276 Usage : $obj->_guess_format($filename) | |
277 Function: guess format based on file suffix | |
278 Example : | |
279 Returns : guessed format of filename (lower case) | |
280 Args : | |
281 Notes : formats that _filehandle() will guess include unigene and dbsnp | |
282 | |
283 =cut | |
284 | |
285 sub _guess_format { | |
286 my $class = shift; | |
287 return unless $_ = shift; | |
288 return 'unigene' if /\.(data)$/i; | |
289 return 'dbsnp' if /\.(xml)$/i; | |
290 } | |
291 | |
292 sub DESTROY { | |
293 my $self = shift; | |
294 | |
295 $self->close(); | |
296 } | |
297 | |
298 # I need some direction on these!! The module works so I haven't fiddled with them! | |
299 | |
300 sub TIEHANDLE { | |
301 my ($class,$val) = @_; | |
302 return bless {'seqio' => $val}, $class; | |
303 } | |
304 | |
305 sub READLINE { | |
306 my $self = shift; | |
307 return $self->{'seqio'}->next_seq() unless wantarray; | |
308 my (@list, $obj); | |
309 push @list, $obj while $obj = $self->{'seqio'}->next_seq(); | |
310 return @list; | |
311 } | |
312 | |
313 sub PRINT { | |
314 my $self = shift; | |
315 $self->{'seqio'}->write_seq(@_); | |
316 } | |
317 | |
318 1; | |
319 |