Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqIO/ace.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # $Id: ace.pm,v 1.15 2002/10/25 16:23:16 jason Exp $ | |
2 # | |
3 # BioPerl module for Bio::SeqIO::ace | |
4 # | |
5 # Cared for by James Gilbert <jgrg@sanger.ac.uk> | |
6 # | |
7 # You may distribute this module under the same terms as perl itself | |
8 | |
9 # POD documentation - main docs before the code | |
10 | |
11 =head1 NAME | |
12 | |
13 Bio::SeqIO::ace - ace sequence input/output stream | |
14 | |
15 =head1 SYNOPSIS | |
16 | |
17 Do not use this module directly. Use it via the Bio::SeqIO class. | |
18 | |
19 =head1 DESCRIPTION | |
20 | |
21 This object can transform Bio::Seq objects to and | |
22 from ace file format. It only parses a DNA or | |
23 Peptide objects contained in the ace file, | |
24 producing PrimarySeq objects from them. All | |
25 other objects in the files will be ignored. It | |
26 doesn't attempt to parse any annotation attatched | |
27 to the containing Sequence or Protein objects, | |
28 which would probably be impossible, since | |
29 everyone's ACeDB schema can be different. | |
30 | |
31 It won't parse ace files containing Timestamps | |
32 correctly either. This can easily be added if | |
33 considered necessary. | |
34 | |
35 =head1 FEEDBACK | |
36 | |
37 =head2 Mailing Lists | |
38 | |
39 User feedback is an integral part of the evolution of this | |
40 and other Bioperl modules. Send your comments and suggestions preferably | |
41 to one of the Bioperl mailing lists. | |
42 Your participation is much appreciated. | |
43 | |
44 bioperl-l@bioperl.org - General discussion | |
45 http://www.bioperl.org/MailList.shtml - About the mailing lists | |
46 | |
47 =head2 Reporting Bugs | |
48 | |
49 Report bugs to the Bioperl bug tracking system to help us keep track | |
50 the bugs and their resolution. | |
51 Bug reports can be submitted via email or the web: | |
52 | |
53 bioperl-bugs@bio.perl.org | |
54 http://bugzilla.bioperl.org/ | |
55 | |
56 =head1 AUTHORS - James Gilbert | |
57 | |
58 Email: jgrg@sanger.ac.uk | |
59 | |
60 =head1 APPENDIX | |
61 | |
62 The rest of the documentation details each of the object | |
63 methods. Internal methods are usually preceded with a _ | |
64 | |
65 =cut | |
66 | |
67 #' | |
68 # Let the code begin... | |
69 | |
70 package Bio::SeqIO::ace; | |
71 use strict; | |
72 use vars qw(@ISA); | |
73 | |
74 use Bio::SeqIO; | |
75 use Bio::Seq; | |
76 use Bio::Seq::SeqFactory; | |
77 | |
78 @ISA = qw(Bio::SeqIO); | |
79 | |
80 sub _initialize { | |
81 my($self,@args) = @_; | |
82 $self->SUPER::_initialize(@args); | |
83 if( ! defined $self->sequence_factory ) { | |
84 $self->sequence_factory(new Bio::Seq::SeqFactory(-verbose => $self->verbose(), -type => 'Bio::PrimarySeq')); | |
85 } | |
86 } | |
87 | |
88 =head2 next_seq | |
89 | |
90 Title : next_seq | |
91 Usage : $seq = $stream->next_seq() | |
92 Function: returns the next sequence in the stream | |
93 Returns : Bio::Seq object | |
94 Args : NONE | |
95 | |
96 =cut | |
97 | |
98 { | |
99 my %bio_mol_type = ( | |
100 'dna' => 'dna', | |
101 'peptide' => 'protein', | |
102 ); | |
103 | |
104 sub next_seq { | |
105 my( $self ) = @_; | |
106 local $/ = ""; # Split input on blank lines | |
107 | |
108 my $fh = $self->_filehandle; | |
109 my( $type, $id ); | |
110 while (<$fh>) { | |
111 if (($type, $id) = /^(DNA|Peptide)[\s:]+(.+?)\s*\n/si) { | |
112 s/^.+$//m; # Remove first line | |
113 s/\s+//g; # Remove whitespace | |
114 last; | |
115 } | |
116 } | |
117 # Return if there weren't any DNA or peptide objects | |
118 return unless $type; | |
119 | |
120 # Choose the molecule type | |
121 my $mol_type = $bio_mol_type{lc $type} | |
122 or $self->throw("Can't get Bio::Seq molecule type for '$type'"); | |
123 | |
124 # Remove quotes from $id | |
125 $id =~ s/^"|"$//g; | |
126 | |
127 # Un-escape forward slashes, double quotes, percent signs, | |
128 # semi-colons, tabs, and backslashes (if you're mad enough | |
129 # to have any of these as part of object names in your acedb | |
130 # database). | |
131 $id =~ s/\\([\/"%;\t\\])/$1/g; | |
132 #" | |
133 # Called as next_seq(), so give back a Bio::Seq | |
134 return $self->sequence_factory->create( | |
135 -seq => $_, | |
136 -primary_id => $id, | |
137 -display_id => $id, | |
138 -alphabet => $mol_type, | |
139 ); | |
140 } | |
141 } | |
142 | |
143 =head2 write_seq | |
144 | |
145 Title : write_seq | |
146 Usage : $stream->write_seq(@seq) | |
147 Function: writes the $seq object into the stream | |
148 Returns : 1 for success and 0 for error | |
149 Args : Bio::Seq object(s) | |
150 | |
151 | |
152 =cut | |
153 | |
154 sub write_seq { | |
155 my ($self, @seq) = @_; | |
156 | |
157 foreach my $seq (@seq) { | |
158 $self->throw("Did not provide a valid Bio::PrimarySeqI object") | |
159 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI'); | |
160 my $mol_type = $seq->alphabet; | |
161 my $id = $seq->display_id; | |
162 | |
163 # Escape special charachers in id | |
164 $id =~ s/([\/"%;\t\\])/\\$1/g; | |
165 #" | |
166 # Print header for DNA or Protein object | |
167 if ($mol_type eq 'dna') { | |
168 $self->_print( | |
169 qq{\nSequence : "$id"\nDNA "$id"\n}, | |
170 qq{\nDNA : "$id"\n}, | |
171 ); | |
172 } | |
173 elsif ($mol_type eq 'protein') { | |
174 $self->_print( | |
175 qq{\nProtein : "$id"\nPeptide "$id"\n}, | |
176 qq{\nPeptide : "$id"\n}, | |
177 ); | |
178 } | |
179 else { | |
180 $self->throw("Don't know how to produce ACeDB output for '$mol_type'"); | |
181 } | |
182 | |
183 # Print the sequence | |
184 my $str = $seq->seq; | |
185 my( $formatted_seq ); | |
186 while ($str =~ /(.{1,60})/g) { | |
187 $formatted_seq .= "$1\n"; | |
188 } | |
189 $self->_print($formatted_seq, "\n"); | |
190 } | |
191 | |
192 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
193 return 1; | |
194 } | |
195 | |
196 1; |