Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqIO/ace.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: ace.pm,v 1.15 2002/10/25 16:23:16 jason Exp $ | |
| 2 # | |
| 3 # BioPerl module for Bio::SeqIO::ace | |
| 4 # | |
| 5 # Cared for by James Gilbert <jgrg@sanger.ac.uk> | |
| 6 # | |
| 7 # You may distribute this module under the same terms as perl itself | |
| 8 | |
| 9 # POD documentation - main docs before the code | |
| 10 | |
| 11 =head1 NAME | |
| 12 | |
| 13 Bio::SeqIO::ace - ace sequence input/output stream | |
| 14 | |
| 15 =head1 SYNOPSIS | |
| 16 | |
| 17 Do not use this module directly. Use it via the Bio::SeqIO class. | |
| 18 | |
| 19 =head1 DESCRIPTION | |
| 20 | |
| 21 This object can transform Bio::Seq objects to and | |
| 22 from ace file format. It only parses a DNA or | |
| 23 Peptide objects contained in the ace file, | |
| 24 producing PrimarySeq objects from them. All | |
| 25 other objects in the files will be ignored. It | |
| 26 doesn't attempt to parse any annotation attatched | |
| 27 to the containing Sequence or Protein objects, | |
| 28 which would probably be impossible, since | |
| 29 everyone's ACeDB schema can be different. | |
| 30 | |
| 31 It won't parse ace files containing Timestamps | |
| 32 correctly either. This can easily be added if | |
| 33 considered necessary. | |
| 34 | |
| 35 =head1 FEEDBACK | |
| 36 | |
| 37 =head2 Mailing Lists | |
| 38 | |
| 39 User feedback is an integral part of the evolution of this | |
| 40 and other Bioperl modules. Send your comments and suggestions preferably | |
| 41 to one of the Bioperl mailing lists. | |
| 42 Your participation is much appreciated. | |
| 43 | |
| 44 bioperl-l@bioperl.org - General discussion | |
| 45 http://www.bioperl.org/MailList.shtml - About the mailing lists | |
| 46 | |
| 47 =head2 Reporting Bugs | |
| 48 | |
| 49 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 50 the bugs and their resolution. | |
| 51 Bug reports can be submitted via email or the web: | |
| 52 | |
| 53 bioperl-bugs@bio.perl.org | |
| 54 http://bugzilla.bioperl.org/ | |
| 55 | |
| 56 =head1 AUTHORS - James Gilbert | |
| 57 | |
| 58 Email: jgrg@sanger.ac.uk | |
| 59 | |
| 60 =head1 APPENDIX | |
| 61 | |
| 62 The rest of the documentation details each of the object | |
| 63 methods. Internal methods are usually preceded with a _ | |
| 64 | |
| 65 =cut | |
| 66 | |
| 67 #' | |
| 68 # Let the code begin... | |
| 69 | |
| 70 package Bio::SeqIO::ace; | |
| 71 use strict; | |
| 72 use vars qw(@ISA); | |
| 73 | |
| 74 use Bio::SeqIO; | |
| 75 use Bio::Seq; | |
| 76 use Bio::Seq::SeqFactory; | |
| 77 | |
| 78 @ISA = qw(Bio::SeqIO); | |
| 79 | |
| 80 sub _initialize { | |
| 81 my($self,@args) = @_; | |
| 82 $self->SUPER::_initialize(@args); | |
| 83 if( ! defined $self->sequence_factory ) { | |
| 84 $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::PrimarySeq')); | |
| 85 } | |
| 86 } | |
| 87 | |
| 88 =head2 next_seq | |
| 89 | |
| 90 Title : next_seq | |
| 91 Usage : $seq = $stream->next_seq() | |
| 92 Function: returns the next sequence in the stream | |
| 93 Returns : Bio::Seq object | |
| 94 Args : NONE | |
| 95 | |
| 96 =cut | |
| 97 | |
| 98 { | |
| 99 my %bio_mol_type = ( | |
| 100 'dna' => 'dna', | |
| 101 'peptide' => 'protein', | |
| 102 ); | |
| 103 | |
| 104 sub next_seq { | |
| 105 my( $self ) = @_; | |
| 106 local $/ = ""; # Split input on blank lines | |
| 107 | |
| 108 my $fh = $self->_filehandle; | |
| 109 my( $type, $id ); | |
| 110 while (<$fh>) { | |
| 111 if (($type, $id) = /^(DNA|Peptide)[\s:]+(.+?)\s*\n/si) { | |
| 112 s/^.+$//m; # Remove first line | |
| 113 s/\s+//g; # Remove whitespace | |
| 114 last; | |
| 115 } | |
| 116 } | |
| 117 # Return if there weren't any DNA or peptide objects | |
| 118 return unless $type; | |
| 119 | |
| 120 # Choose the molecule type | |
| 121 my $mol_type = $bio_mol_type{lc $type} | |
| 122 or $self->throw("Can't get Bio::Seq molecule type for '$type'"); | |
| 123 | |
| 124 # Remove quotes from $id | |
| 125 $id =~ s/^"|"$//g; | |
| 126 | |
| 127 # Un-escape forward slashes, double quotes, percent signs, | |
| 128 # semi-colons, tabs, and backslashes (if you're mad enough | |
| 129 # to have any of these as part of object names in your acedb | |
| 130 # database). | |
| 131 $id =~ s/\\([\/"%;\t\\])/$1/g; | |
| 132 #" | |
| 133 # Called as next_seq(), so give back a Bio::Seq | |
| 134 return $self->sequence_factory->create( | |
| 135 -seq => $_, | |
| 136 -primary_id => $id, | |
| 137 -display_id => $id, | |
| 138 -alphabet => $mol_type, | |
| 139 ); | |
| 140 } | |
| 141 } | |
| 142 | |
| 143 =head2 write_seq | |
| 144 | |
| 145 Title : write_seq | |
| 146 Usage : $stream->write_seq(@seq) | |
| 147 Function: writes the $seq object into the stream | |
| 148 Returns : 1 for success and 0 for error | |
| 149 Args : Bio::Seq object(s) | |
| 150 | |
| 151 | |
| 152 =cut | |
| 153 | |
| 154 sub write_seq { | |
| 155 my ($self, @seq) = @_; | |
| 156 | |
| 157 foreach my $seq (@seq) { | |
| 158 $self->throw("Did not provide a valid Bio::PrimarySeqI object") | |
| 159 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI'); | |
| 160 my $mol_type = $seq->alphabet; | |
| 161 my $id = $seq->display_id; | |
| 162 | |
| 163 # Escape special charachers in id | |
| 164 $id =~ s/([\/"%;\t\\])/\\$1/g; | |
| 165 #" | |
| 166 # Print header for DNA or Protein object | |
| 167 if ($mol_type eq 'dna') { | |
| 168 $self->_print( | |
| 169 qq{\nSequence : "$id"\nDNA "$id"\n}, | |
| 170 qq{\nDNA : "$id"\n}, | |
| 171 ); | |
| 172 } | |
| 173 elsif ($mol_type eq 'protein') { | |
| 174 $self->_print( | |
| 175 qq{\nProtein : "$id"\nPeptide "$id"\n}, | |
| 176 qq{\nPeptide : "$id"\n}, | |
| 177 ); | |
| 178 } | |
| 179 else { | |
| 180 $self->throw("Don't know how to produce ACeDB output for '$mol_type'"); | |
| 181 } | |
| 182 | |
| 183 # Print the sequence | |
| 184 my $str = $seq->seq; | |
| 185 my( $formatted_seq ); | |
| 186 while ($str =~ /(.{1,60})/g) { | |
| 187 $formatted_seq .= "$1\n"; | |
| 188 } | |
| 189 $self->_print($formatted_seq, "\n"); | |
| 190 } | |
| 191 | |
| 192 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
| 193 return 1; | |
| 194 } | |
| 195 | |
| 196 1; |
