comparison variant_effect_predictor/Bio/AlignIO/msf.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: msf.pm,v 1.16 2002/11/26 16:34:39 jason Exp $
2 #
3 # BioPerl module for Bio::AlignIO::msf
4
5 # based on the Bio::SeqIO::msf module
6 # by Ewan Birney <birney@sanger.ac.uk>
7 # and Lincoln Stein <lstein@cshl.org>
8 #
9 # and the SimpleAlign.pm module of Ewan Birney
10 #
11 # Copyright Peter Schattner
12 #
13 # You may distribute this module under the same terms as perl itself
14 # _history
15 # September 5, 2000
16 # POD documentation - main docs before the code
17
18 =head1 NAME
19
20 Bio::AlignIO::msf - msf sequence input/output stream
21
22 =head1 SYNOPSIS
23
24 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
25
26 =head1 DESCRIPTION
27
28 This object can transform L<Bio::Align::AlignI> objects to and from msf flat
29 file databases.
30
31 =head1 FEEDBACK
32
33 =head2 Reporting Bugs
34
35 Report bugs to the Bioperl bug tracking system to help us keep track
36 the bugs and their resolution.
37 Bug reports can be submitted via email or the web:
38
39 bioperl-bugs@bio.perl.org
40 http://bugzilla.bioperl.org/
41
42 =head1 AUTHORS - Peter Schattner
43
44 Email: schattner@alum.mit.edu
45
46
47 =head1 APPENDIX
48
49 The rest of the documentation details each of the object
50 methods. Internal methods are usually preceded with a _
51
52 =cut
53
54 # Let the code begin...
55
56 package Bio::AlignIO::msf;
57 use vars qw(@ISA %valid_type);
58 use strict;
59
60 use Bio::AlignIO;
61 use Bio::SeqIO::gcg; # for GCG_checksum()
62 use Bio::SimpleAlign;
63
64 @ISA = qw(Bio::AlignIO);
65
66 BEGIN {
67 %valid_type = qw( dna N rna N protein P );
68 }
69
70 =head2 next_aln
71
72 Title : next_aln
73 Usage : $aln = $stream->next_aln()
74 Function: returns the next alignment in the stream. Tries to read *all* MSF
75 It reads all non whitespace characters in the alignment
76 area. For MSFs with weird gaps (eg ~~~) map them by using
77 $al->map_chars('~','-')
78 Returns : L<Bio::Align::AlignI> object
79 Args : NONE
80
81 =cut
82
83 sub next_aln {
84 my $self = shift;
85 my $entry;
86 my (%hash,$name,$str,@names,$seqname,$start,$end,$count,$seq);
87
88 my $aln = Bio::SimpleAlign->new(-source => 'gcg' );
89
90 while( $entry = $self->_readline) {
91 $entry =~ /\/\// && last; # move to alignment section
92 $entry =~ /Name:\s+(\S+)/ && do { $name = $1;
93 $hash{$name} = ""; # blank line
94 push(@names,$name); # we need it ordered!
95 };
96 # otherwise - skip
97 }
98
99 # alignment section
100
101 while( $entry = $self->_readline) {
102 next if ( $entry =~ /^\s+(\d+)/ ) ;
103 $entry =~ /^\s*(\S+)\s+(.*)$/ && do {
104 $name = $1;
105 $str = $2;
106 if( ! exists $hash{$name} ) {
107 $self->throw("$name exists as an alignment line but not in the header. Not confident of what is going on!");
108 }
109 $str =~ s/\s//g;
110 $hash{$name} .= $str;
111 };
112 }
113
114 return 0 if scalar @names < 1;
115
116 # now got this as a name - sequence hash. Lets make some sequences!
117
118 foreach $name ( @names ) {
119 if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
120 $seqname = $1;
121 $start = $2;
122 $end = $3;
123 } else {
124 $seqname=$name;
125 $start = 1;
126 $str = $hash{$name};
127 $str =~ s/[^A-Za-z]//g;
128 $end = length($str);
129 }
130
131 $seq = new Bio::LocatableSeq('-seq'=>$hash{$name},
132 '-id'=>$seqname,
133 '-start'=>$start,
134 '-end'=>$end,
135 );
136
137 $aln->add_seq($seq);
138
139
140 # If $end <= 0, we have either reached the end of
141 # file in <> or we have encountered some other error
142 #
143 # if ($end <= 0) { undef $aln;}
144
145
146 }
147
148 return $aln;
149 }
150
151
152
153
154 =head2 write_aln
155
156 Title : write_aln
157 Usage : $stream->write_aln(@aln)
158 Function: writes the $aln object into the stream in MSF format
159 Sequence type of the alignment is determined by the first sequence.
160 Returns : 1 for success and 0 for error
161 Args : L<Bio::Align::AlignI> object
162
163
164 =cut
165
166 sub write_aln {
167 my ($self,@aln) = @_;
168 my $msftag;
169 my $type;
170 my $count = 0;
171 my $maxname;
172 my ($length,$date,$name,$seq,$miss,$pad,%hash,@arr,$tempcount,$index);
173 foreach my $aln (@aln) {
174 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
175 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
176 next;
177 }
178 $date = localtime(time);
179 $msftag = "MSF";
180 $type = $valid_type{$aln->get_seq_by_pos(1)->alphabet};
181 $maxname = $aln->maxdisplayname_length();
182 $length = $aln->length();
183 $name = $aln->id();
184 if( !defined $name ) {
185 $name = "Align";
186 }
187
188
189 $self->_print (sprintf("\n%s MSF: %d Type: %s %s Check: 00 ..\n\n",
190 $name, $aln->no_sequences, $type, $date));
191
192
193 foreach $seq ( $aln->each_seq() ) {
194
195
196 $name = $aln->displayname($seq->get_nse());
197 $miss = $maxname - length ($name);
198 $miss += 2;
199 $pad = " " x $miss;
200
201 $self->_print (sprintf(" Name: %s%sLen: %d Check: %d Weight: 1.00\n",$name,$pad,length $seq->seq(), Bio::SeqIO::gcg->GCG_checksum($seq)));
202
203 $hash{$name} = $seq->seq();
204 push(@arr,$name);
205 }
206 # ok - heavy handed, but there you go.
207 #
208 $self->_print ("\n//\n\n\n");
209
210 while( $count < $length ) {
211 # there is another block to go!
212 foreach $name ( @arr ) {
213 $self->_print (sprintf("%-20s ",$name));
214
215 $tempcount = $count;
216 $index = 0;
217 while( ($tempcount + 10 < $length) && ($index < 5) ) {
218
219 $self->_print (sprintf("%s ",substr($hash{$name},$tempcount,10)));
220
221 $tempcount += 10;
222 $index++;
223 } #
224 # ok, could be the very last guy ;)
225 #
226 if( $index < 5) {
227 # space to print!
228 #
229 $self->_print (sprintf("%s ",substr($hash{$name},$tempcount)));
230 $tempcount += 10;
231 }
232 $self->_print ("\n");
233 }
234 $self->_print ("\n\n");
235 $count = $tempcount;
236 }
237 }
238 $self->flush if $self->_flush_on_write && defined $self->_fh;
239 return 1;
240 }
241
242 1;