Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/ClusterIO.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: ClusterIO.pm,v 1.11.2.1 2003/01/21 01:11:17 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::ClusterIO.pm | |
| 4 # | |
| 5 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz> | |
| 6 # | |
| 7 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green | |
| 8 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago | |
| 9 # http://anatomy.otago.ac.nz/meg | |
| 10 # | |
| 11 # You may distribute this module under the same terms as perl itself | |
| 12 # | |
| 13 # _history | |
| 14 # | |
| 15 # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm | |
| 16 # by Andrew Macgregor | |
| 17 # | |
| 18 # April 17, 2002 - Initial implementation by Andrew Macgregor | |
| 19 # POD documentation - main docs before the code | |
| 20 | |
| 21 =head1 NAME | |
| 22 | |
| 23 Bio::ClusterIO - Handler for Cluster Formats | |
| 24 | |
| 25 =head1 SYNOPSIS | |
| 26 | |
| 27 #NB: This example is unigene specific | |
| 28 | |
| 29 use Bio::ClusterIO; | |
| 30 | |
| 31 $stream = Bio::ClusterIO->new('-file' => "Hs.data", | |
| 32 '-format' => "unigene"); | |
| 33 # note: we quote -format to keep older perl's from complaining. | |
| 34 | |
| 35 while ( my $in = $stream->next_cluster() ) { | |
| 36 print $in->unigene_id() . "\n"; | |
| 37 while ( my $sequence = $in->next_seq() ) { | |
| 38 print $sequence->accession_number() . "\n"; | |
| 39 } | |
| 40 } | |
| 41 # Parsing errors are printed to STDERR. | |
| 42 | |
| 43 =head1 DESCRIPTION | |
| 44 | |
| 45 The ClusterIO module works with the ClusterIO format module to read | |
| 46 various cluster formats such as NCBI UniGene. | |
| 47 | |
| 48 | |
| 49 =head1 CONSTRUCTORS | |
| 50 | |
| 51 =head2 Bio::ClusterIO-E<gt>new() | |
| 52 | |
| 53 $str = Bio::ClusterIO->new(-file => 'filename', | |
| 54 -format=>$format); | |
| 55 | |
| 56 The new() class method constructs a new Bio::ClusterIO object. The | |
| 57 returned object can be used to retrieve or print cluster | |
| 58 objects. new() accepts the following parameters: | |
| 59 | |
| 60 =over 4 | |
| 61 | |
| 62 =item -file | |
| 63 | |
| 64 A file path to be opened for reading. | |
| 65 | |
| 66 =item -format | |
| 67 | |
| 68 Specify the format of the file. Supported formats include: | |
| 69 | |
| 70 unigene *.data UniGene build files. | |
| 71 dbsnp *.xml dbSNP XML files | |
| 72 | |
| 73 If no format is specified and a filename is given, then the module | |
| 74 will attempt to deduce it from the filename. If this is unsuccessful, | |
| 75 the main UniGene build format is assumed. | |
| 76 | |
| 77 The format name is case insensitive. 'UNIGENE', 'UniGene' and | |
| 78 'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP | |
| 79 | |
| 80 =back | |
| 81 | |
| 82 =head1 OBJECT METHODS | |
| 83 | |
| 84 See below for more detailed summaries. The main methods are: | |
| 85 | |
| 86 =head2 $cluster = $str-E<gt>next_cluster() | |
| 87 | |
| 88 Fetch the next cluster from the stream. | |
| 89 | |
| 90 | |
| 91 =head2 TIEHANDLE(), READLINE(), PRINT() | |
| 92 | |
| 93 These I've left in here because they were in the SeqIO | |
| 94 module. Feedback appreciated. There they provide the tie interface. | |
| 95 See L<perltie> for more details. | |
| 96 | |
| 97 =head1 FEEDBACK | |
| 98 | |
| 99 =head2 Mailing Lists | |
| 100 | |
| 101 User feedback is an integral part of the evolution of this | |
| 102 and other Bioperl modules. Send your comments and suggestions preferably | |
| 103 to one of the Bioperl mailing lists. | |
| 104 Your participation is much appreciated. | |
| 105 | |
| 106 bioperl-l@bioperl.org - General discussion | |
| 107 http://bioperl.org/MailList.shtml - About the mailing lists | |
| 108 | |
| 109 =head2 Reporting Bugs | |
| 110 | |
| 111 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 112 the bugs and their resolution. | |
| 113 Bug reports can be submitted via email or the web: | |
| 114 | |
| 115 bioperl-bugs@bioperl.org | |
| 116 http://bugzilla.bioperl.org/ | |
| 117 | |
| 118 =head1 AUTHOR - Andrew Macgregor | |
| 119 | |
| 120 Email andrew@anatomy.otago.ac.nz | |
| 121 | |
| 122 =head1 APPENDIX | |
| 123 | |
| 124 The rest of the documentation details each of the object | |
| 125 methods. Internal methods are usually preceded with a _ | |
| 126 | |
| 127 =cut | |
| 128 | |
| 129 #' | |
| 130 # Let the code begin... | |
| 131 | |
| 132 package Bio::ClusterIO; | |
| 133 | |
| 134 use strict; | |
| 135 use vars qw(@ISA); | |
| 136 | |
| 137 use Bio::Root::Root; | |
| 138 use Bio::Root::IO; | |
| 139 | |
| 140 @ISA = qw(Bio::Root::Root Bio::Root::IO); | |
| 141 | |
| 142 | |
| 143 | |
| 144 =head2 new | |
| 145 | |
| 146 Title : new | |
| 147 Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format') | |
| 148 Function: Returns a new cluster stream | |
| 149 Returns : A Bio::ClusterIO::Handler initialised with the appropriate format | |
| 150 Args : -file => $filename | |
| 151 -format => format | |
| 152 | |
| 153 =cut | |
| 154 | |
| 155 | |
| 156 my $entry = 0; | |
| 157 | |
| 158 sub new { | |
| 159 my ($caller,@args) = @_; | |
| 160 my $class = ref($caller) || $caller; | |
| 161 | |
| 162 # or do we want to call SUPER on an object if $caller is an | |
| 163 # object? | |
| 164 if( $class =~ /Bio::ClusterIO::(\S+)/ ) { | |
| 165 my ($self) = $class->SUPER::new(@args); | |
| 166 $self->_initialize(@args); | |
| 167 return $self; | |
| 168 } else { | |
| 169 | |
| 170 my %param = @args; | |
| 171 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys | |
| 172 my $format = $param{'-format'} || | |
| 173 $class->_guess_format( $param{-file} || $ARGV[0] ); | |
| 174 $format = "\L$format"; # normalize capitalization to lower case | |
| 175 | |
| 176 return undef unless( $class->_load_format_module($format) ); | |
| 177 return "Bio::ClusterIO::$format"->new(@args); | |
| 178 } | |
| 179 } | |
| 180 | |
| 181 | |
| 182 # _initialize is chained for all ClusterIO classes | |
| 183 | |
| 184 sub _initialize { | |
| 185 my($self, @args) = @_; | |
| 186 # initialize the IO part | |
| 187 $self->_initialize_io(@args); | |
| 188 } | |
| 189 | |
| 190 =head2 next_cluster | |
| 191 | |
| 192 Title : next_cluster | |
| 193 Usage : $cluster = $stream->next_cluster() | |
| 194 Function: Reads the next cluster object from the stream and returns it. | |
| 195 Returns : a L<Bio::ClusterI> compliant object | |
| 196 Args : none | |
| 197 | |
| 198 | |
| 199 =cut | |
| 200 | |
| 201 sub next_cluster { | |
| 202 my ($self, $seq) = @_; | |
| 203 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object."); | |
| 204 } | |
| 205 | |
| 206 =head2 cluster_factory | |
| 207 | |
| 208 Title : cluster_factory | |
| 209 Usage : $obj->cluster_factory($newval) | |
| 210 Function: Get/set the object factory to use for creating the cluster | |
| 211 objects. | |
| 212 Example : | |
| 213 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object | |
| 214 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI> | |
| 215 compliant object or undef, optional) | |
| 216 | |
| 217 | |
| 218 =cut | |
| 219 | |
| 220 sub cluster_factory{ | |
| 221 my $self = shift; | |
| 222 | |
| 223 return $self->{'cluster_factory'} = shift if @_; | |
| 224 return $self->{'cluster_factory'}; | |
| 225 } | |
| 226 | |
| 227 =head2 object_factory | |
| 228 | |
| 229 Title : object_factory | |
| 230 Usage : $obj->object_factory($newval) | |
| 231 Function: This is an alias to cluster_factory with a more generic name. | |
| 232 Example : | |
| 233 Returns : a L<Bio::Factory::ObjectFactoryI> compliant object | |
| 234 Args : on set, new value (a L<Bio::Factory::ObjectFactoryI> | |
| 235 compliant object or undef, optional) | |
| 236 | |
| 237 | |
| 238 =cut | |
| 239 | |
| 240 sub object_factory{ | |
| 241 return shift->cluster_factory(@_); | |
| 242 } | |
| 243 | |
| 244 =head2 _load_format_module | |
| 245 | |
| 246 Title : _load_format_module | |
| 247 Usage : *INTERNAL ClusterIO stuff* | |
| 248 Function: Loads up (like use) a module at run time on demand | |
| 249 Example : | |
| 250 Returns : | |
| 251 Args : | |
| 252 | |
| 253 =cut | |
| 254 | |
| 255 sub _load_format_module { | |
| 256 my ($self,$format) = @_; | |
| 257 my $module = "Bio::ClusterIO::" . $format; | |
| 258 my $ok; | |
| 259 | |
| 260 eval { | |
| 261 $ok = $self->_load_module($module); | |
| 262 }; | |
| 263 if ( $@ ) { | |
| 264 print STDERR <<END; | |
| 265 $self: could not load $format - for more details on supported formats please see the ClusterIO docs | |
| 266 Exception $@ | |
| 267 END | |
| 268 ; | |
| 269 } | |
| 270 return $ok; | |
| 271 } | |
| 272 | |
| 273 =head2 _guess_format | |
| 274 | |
| 275 Title : _guess_format | |
| 276 Usage : $obj->_guess_format($filename) | |
| 277 Function: guess format based on file suffix | |
| 278 Example : | |
| 279 Returns : guessed format of filename (lower case) | |
| 280 Args : | |
| 281 Notes : formats that _filehandle() will guess include unigene and dbsnp | |
| 282 | |
| 283 =cut | |
| 284 | |
| 285 sub _guess_format { | |
| 286 my $class = shift; | |
| 287 return unless $_ = shift; | |
| 288 return 'unigene' if /\.(data)$/i; | |
| 289 return 'dbsnp' if /\.(xml)$/i; | |
| 290 } | |
| 291 | |
| 292 sub DESTROY { | |
| 293 my $self = shift; | |
| 294 | |
| 295 $self->close(); | |
| 296 } | |
| 297 | |
| 298 # I need some direction on these!! The module works so I haven't fiddled with them! | |
| 299 | |
| 300 sub TIEHANDLE { | |
| 301 my ($class,$val) = @_; | |
| 302 return bless {'seqio' => $val}, $class; | |
| 303 } | |
| 304 | |
| 305 sub READLINE { | |
| 306 my $self = shift; | |
| 307 return $self->{'seqio'}->next_seq() unless wantarray; | |
| 308 my (@list, $obj); | |
| 309 push @list, $obj while $obj = $self->{'seqio'}->next_seq(); | |
| 310 return @list; | |
| 311 } | |
| 312 | |
| 313 sub PRINT { | |
| 314 my $self = shift; | |
| 315 $self->{'seqio'}->write_seq(@_); | |
| 316 } | |
| 317 | |
| 318 1; | |
| 319 |
