comparison variant_effect_predictor/Bio/SeqIO/ace.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: ace.pm,v 1.15 2002/10/25 16:23:16 jason Exp $
2 #
3 # BioPerl module for Bio::SeqIO::ace
4 #
5 # Cared for by James Gilbert <jgrg@sanger.ac.uk>
6 #
7 # You may distribute this module under the same terms as perl itself
8
9 # POD documentation - main docs before the code
10
11 =head1 NAME
12
13 Bio::SeqIO::ace - ace sequence input/output stream
14
15 =head1 SYNOPSIS
16
17 Do not use this module directly. Use it via the Bio::SeqIO class.
18
19 =head1 DESCRIPTION
20
21 This object can transform Bio::Seq objects to and
22 from ace file format. It only parses a DNA or
23 Peptide objects contained in the ace file,
24 producing PrimarySeq objects from them. All
25 other objects in the files will be ignored. It
26 doesn't attempt to parse any annotation attatched
27 to the containing Sequence or Protein objects,
28 which would probably be impossible, since
29 everyone's ACeDB schema can be different.
30
31 It won't parse ace files containing Timestamps
32 correctly either. This can easily be added if
33 considered necessary.
34
35 =head1 FEEDBACK
36
37 =head2 Mailing Lists
38
39 User feedback is an integral part of the evolution of this
40 and other Bioperl modules. Send your comments and suggestions preferably
41 to one of the Bioperl mailing lists.
42 Your participation is much appreciated.
43
44 bioperl-l@bioperl.org - General discussion
45 http://www.bioperl.org/MailList.shtml - About the mailing lists
46
47 =head2 Reporting Bugs
48
49 Report bugs to the Bioperl bug tracking system to help us keep track
50 the bugs and their resolution.
51 Bug reports can be submitted via email or the web:
52
53 bioperl-bugs@bio.perl.org
54 http://bugzilla.bioperl.org/
55
56 =head1 AUTHORS - James Gilbert
57
58 Email: jgrg@sanger.ac.uk
59
60 =head1 APPENDIX
61
62 The rest of the documentation details each of the object
63 methods. Internal methods are usually preceded with a _
64
65 =cut
66
67 #'
68 # Let the code begin...
69
70 package Bio::SeqIO::ace;
71 use strict;
72 use vars qw(@ISA);
73
74 use Bio::SeqIO;
75 use Bio::Seq;
76 use Bio::Seq::SeqFactory;
77
78 @ISA = qw(Bio::SeqIO);
79
80 sub _initialize {
81 my($self,@args) = @_;
82 $self->SUPER::_initialize(@args);
83 if( ! defined $self->sequence_factory ) {
84 $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::PrimarySeq'));
85 }
86 }
87
88 =head2 next_seq
89
90 Title : next_seq
91 Usage : $seq = $stream->next_seq()
92 Function: returns the next sequence in the stream
93 Returns : Bio::Seq object
94 Args : NONE
95
96 =cut
97
98 {
99 my %bio_mol_type = (
100 'dna' => 'dna',
101 'peptide' => 'protein',
102 );
103
104 sub next_seq {
105 my( $self ) = @_;
106 local $/ = ""; # Split input on blank lines
107
108 my $fh = $self->_filehandle;
109 my( $type, $id );
110 while (<$fh>) {
111 if (($type, $id) = /^(DNA|Peptide)[\s:]+(.+?)\s*\n/si) {
112 s/^.+$//m; # Remove first line
113 s/\s+//g; # Remove whitespace
114 last;
115 }
116 }
117 # Return if there weren't any DNA or peptide objects
118 return unless $type;
119
120 # Choose the molecule type
121 my $mol_type = $bio_mol_type{lc $type}
122 or $self->throw("Can't get Bio::Seq molecule type for '$type'");
123
124 # Remove quotes from $id
125 $id =~ s/^"|"$//g;
126
127 # Un-escape forward slashes, double quotes, percent signs,
128 # semi-colons, tabs, and backslashes (if you're mad enough
129 # to have any of these as part of object names in your acedb
130 # database).
131 $id =~ s/\\([\/"%;\t\\])/$1/g;
132 #"
133 # Called as next_seq(), so give back a Bio::Seq
134 return $self->sequence_factory->create(
135 -seq => $_,
136 -primary_id => $id,
137 -display_id => $id,
138 -alphabet => $mol_type,
139 );
140 }
141 }
142
143 =head2 write_seq
144
145 Title : write_seq
146 Usage : $stream->write_seq(@seq)
147 Function: writes the $seq object into the stream
148 Returns : 1 for success and 0 for error
149 Args : Bio::Seq object(s)
150
151
152 =cut
153
154 sub write_seq {
155 my ($self, @seq) = @_;
156
157 foreach my $seq (@seq) {
158 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
159 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
160 my $mol_type = $seq->alphabet;
161 my $id = $seq->display_id;
162
163 # Escape special charachers in id
164 $id =~ s/([\/"%;\t\\])/\\$1/g;
165 #"
166 # Print header for DNA or Protein object
167 if ($mol_type eq 'dna') {
168 $self->_print(
169 qq{\nSequence : "$id"\nDNA "$id"\n},
170 qq{\nDNA : "$id"\n},
171 );
172 }
173 elsif ($mol_type eq 'protein') {
174 $self->_print(
175 qq{\nProtein : "$id"\nPeptide "$id"\n},
176 qq{\nPeptide : "$id"\n},
177 );
178 }
179 else {
180 $self->throw("Don't know how to produce ACeDB output for '$mol_type'");
181 }
182
183 # Print the sequence
184 my $str = $seq->seq;
185 my( $formatted_seq );
186 while ($str =~ /(.{1,60})/g) {
187 $formatted_seq .= "$1\n";
188 }
189 $self->_print($formatted_seq, "\n");
190 }
191
192 $self->flush if $self->_flush_on_write && defined $self->_fh;
193 return 1;
194 }
195
196 1;