comparison variant_effect_predictor/Bio/AlignIO/stockholm.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: stockholm.pm,v 1.10.2.1 2003/03/14 09:14:59 heikki Exp $
2 #
3 # BioPerl module for Bio::AlignIO::stockholm
4
5 # based on the Bio::SeqIO::stockholm module
6 # by Ewan Birney <birney@sanger.ac.uk>
7 # and Lincoln Stein <lstein@cshl.org>
8 #
9 # and the SimpleAlign.pm module of Ewan Birney
10 #
11 # Copyright Peter Schattner
12 #
13 # You may distribute this module under the same terms as perl itself
14 # _history
15 # September 5, 2000
16 # POD documentation - main docs before the code
17
18 =head1 NAME
19
20 Bio::AlignIO::stockholm - stockholm sequence input/output stream
21
22 =head1 SYNOPSIS
23
24 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
25
26 =head1 DESCRIPTION
27
28 This object can transform L<Bio::Align::AlignI> objects to and from
29 stockholm flat file databases.
30
31 =head1 FEEDBACK
32
33 =head2 Reporting Bugs
34
35 Report bugs to the Bioperl bug tracking system to help us keep track
36 the bugs and their resolution. Bug reports can be submitted via email
37 or the web:
38
39 bioperl-bugs@bio.perl.org
40 http://bugzilla.bioperl.org/
41
42 =head1 AUTHORS - Peter Schattner
43
44 Email: schattner@alum.mit.edu
45
46 =head1 CONTRIBUTORS
47
48 Andreas Kahari, ak@ebi.ac.uk
49
50 =head1 APPENDIX
51
52 The rest of the documentation details each of the object
53 methods. Internal methods are usually preceded with a _
54
55 =cut
56
57 # Let the code begin...
58
59 package Bio::AlignIO::stockholm;
60 use vars qw(@ISA);
61 use strict;
62
63 use Bio::AlignIO;
64
65 @ISA = qw(Bio::AlignIO);
66
67 =head2 next_aln
68
69 Title : next_aln
70 Usage : $aln = $stream->next_aln()
71 Function: returns the next alignment in the stream.
72 Returns : L<Bio::Align::AlignI> object
73 Args : NONE
74
75 =cut
76
77 sub next_aln {
78 my $self = shift;
79 my $entry;
80
81 my ($start,$end,%align,$name,$seqname,$seq,$count,
82 %hash,%c2name, %accession, $no);
83
84 # in stockholm format, every non-blank line that does not start
85 # with '#=' is an alignment segment; the '#=' lines are mark up lines.
86 # Of particular interest are the '#=GF <name/st-ed> AC <accession>'
87 # lines, which give accession numbers for each segment
88
89 my $aln = Bio::SimpleAlign->new(-source => 'stockholm');
90
91 while( defined($entry = $self->_readline) ) {
92 $entry !~ /\w+/ && next;
93
94 if ($entry =~ /^#\s*STOCKHOLM\s+/) {
95 last;
96 }
97 else {
98 $self->throw("Not Stockholm format: Expecting \"# STOCKHOLM 1.0\"; Found \"$_\"");
99 }
100 }
101 #
102 # Next section is same as for selex format
103 #
104 while( defined($entry = $self->_readline) ) {
105 # Double slash (//) signals end of file. The flat Pfam-A data from
106 # ftp://ftp.sanger.ac.uk/pub/databases/Pfam/Pfam-A.full.gz consists
107 # of several concatenated Stockholm-formatted files. The following
108 # line makes it possible to parse it without this module trying to
109 # read the whole file into memory. Andreas Kähäri 10/3/2003.
110 last if $entry =~ /^\/\//;
111
112 # Extra bonus: Get the name of the alignment.
113 # Andreas Kähäri 10/3/2003.
114 if ($entry =~ /^#=GF\s+AC\s+(\S+)/) {
115 $aln->id($1);
116 next;
117 }
118
119 $entry =~ /^#=GS\s+(\S+)\s+AC\s+(\S+)/ && do {
120 $accession{ $1 } = $2;
121 next;
122 };
123 $entry =~ /^([A-Za-z.-]+)$/ && ( $align{$name} .= $1 ) && next;
124 $entry !~ /^([^#]\S+)\s+([A-Za-z.-]+)\s*/ && next;
125
126
127 $name = $1;
128 $seq = $2;
129
130 if( ! defined $align{$name} ) {
131 $count++;
132 $c2name{$count} = $name;
133 }
134 $align{$name} .= $seq;
135 }
136
137 # ok... now we can make the sequences
138
139 foreach $no ( sort { $a <=> $b } keys %c2name ) {
140 $name = $c2name{$no};
141
142 if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
143 $seqname = $1;
144 $start = $2;
145 $end = $3;
146 } else {
147 $seqname=$name;
148 $start = 1;
149 $end = length($align{$name});
150 }
151 $seq = new Bio::LocatableSeq('-seq'=>$align{$name},
152 '-id'=>$seqname,
153 '-start'=>$start,
154 '-end'=>$end,
155 '-type'=>'aligned',
156 '-accession_number' => $accession{$name},
157
158 );
159
160 $aln->add_seq($seq);
161
162 }
163
164 # If $end <= 0, we have either reached the end of
165 # file in <fh> or we have encountered some other error
166 #
167 if ($end <= 0) { undef $aln;}
168
169 return $aln;
170 }
171
172
173 =head2 write_aln
174
175 Title : write_aln
176 Usage : $stream->write_aln(@aln)
177 Function: writes the $aln object into the stream in stockholm format ###Not yet implemented!###
178 Returns : 1 for success and 0 for error
179 Args : L<Bio::Align::AlignI> object
180
181
182 =cut
183
184 sub write_aln {
185 my ($self,@aln) = @_;
186
187 $self->throw("Sorry: stockholm-format output, not yet implemented! /n");
188 }
189
190 1;