comparison variant_effect_predictor/Bio/AlignIO/selex.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:2bc9b66ada89
1 # $Id: selex.pm,v 1.10 2002/10/22 07:38:26 lapp Exp $
2 #
3 # BioPerl module for Bio::AlignIO::selex
4
5 # based on the Bio::SeqIO::selex module
6 # by Ewan Birney <birney@sanger.ac.uk>
7 # and Lincoln Stein <lstein@cshl.org>
8 #
9 # and the SimpleAlign.pm module of Ewan Birney
10 #
11 # Copyright Peter Schattner
12 #
13 # You may distribute this module under the same terms as perl itself
14 # _history
15 # September 5, 2000
16 # POD documentation - main docs before the code
17
18 =head1 NAME
19
20 Bio::AlignIO::selex - selex sequence input/output stream
21
22 =head1 SYNOPSIS
23
24 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
25
26 =head1 DESCRIPTION
27
28 This object can transform L<Bio::Align::AlignI> objects to and from selex flat
29 file databases.
30
31 =head1 FEEDBACK
32
33 =head2 Reporting Bugs
34
35 Report bugs to the Bioperl bug tracking system to help us keep track
36 the bugs and their resolution.
37 Bug reports can be submitted via email or the web:
38
39 bioperl-bugs@bio.perl.org
40 http://bugzilla.bioperl.org/
41
42 =head1 AUTHORS - Peter Schattner
43
44 Email: schattner@alum.mit.edu
45
46
47 =head1 APPENDIX
48
49 The rest of the documentation details each of the object
50 methods. Internal methods are usually preceded with a _
51
52 =cut
53
54 # Let the code begin...
55
56 package Bio::AlignIO::selex;
57 use vars qw(@ISA);
58 use strict;
59 use Bio::AlignIO;
60
61 @ISA = qw(Bio::AlignIO);
62
63 =head2 next_aln
64
65 Title : next_aln
66 Usage : $aln = $stream->next_aln()
67 Function: returns the next alignment in the stream. Tries to read *all* selex
68 It reads all non whitespace characters in the alignment
69 area. For selexs with weird gaps (eg ~~~) map them by using
70 $al->map_chars('~','-')
71 Returns : L<Bio::Align::AlignI> object
72 Args : NONE
73
74 =cut
75
76 sub next_aln {
77 my $self = shift;
78 my $entry;
79 my ($start,$end,%align,$name,$seqname,$seq,$count,%hash,%c2name, %accession, $no);
80 my $aln = Bio::SimpleAlign->new(-source => 'selex');
81
82 # in selex format, every non-blank line that does not start
83 # with '#=' is an alignment segment; the '#=' lines are mark up lines.
84 # Of particular interest are the '#=GF <name/st-ed> AC <accession>'
85 # lines, which give accession numbers for each segment
86
87 while( $entry = $self->_readline) {
88 $entry =~ /^\#=GS\s+(\S+)\s+AC\s+(\S+)/ && do {
89 $accession{ $1 } = $2;
90 next;
91 };
92 $entry !~ /^([^\#]\S+)\s+([A-Za-z\.\-]+)\s*/ && next;
93
94 $name = $1;
95 $seq = $2;
96
97 if( ! defined $align{$name} ) {
98 $count++;
99 $c2name{$count} = $name;
100 }
101 $align{$name} .= $seq;
102 }
103
104 # ok... now we can make the sequences
105
106 $count = 0;
107 foreach $no ( sort { $a <=> $b } keys %c2name ) {
108 $name = $c2name{$no};
109
110 if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
111 $seqname = $1;
112 $start = $2;
113 $end = $3;
114 } else {
115 $seqname=$name;
116 $start = 1;
117 $end = length($align{$name});
118 }
119 $seq = new Bio::LocatableSeq('-seq'=>$align{$name},
120 '-id'=>$seqname,
121 '-start'=>$start,
122 '-end'=>$end,
123 '-type'=>'aligned',
124 '-accession_number' => $accession{$name},
125
126 );
127
128 $aln->add_seq($seq);
129 $count++;
130 }
131
132 # If $end <= 0, we have either reached the end of
133 # file in <> or we have encountered some other error
134 #
135 if ($end <= 0) { undef $aln;}
136
137 return $aln;
138 }
139
140
141 =head2 write_aln
142
143 Title : write_aln
144 Usage : $stream->write_aln(@aln)
145 Function: writes the $aln object into the stream in selex format
146 Returns : 1 for success and 0 for error
147 Args : L<Bio::Align::AlignI> object
148
149
150 =cut
151
152 sub write_aln {
153 my ($self,@aln) = @_;
154 my ($namestr,$seq,$add);
155 my ($maxn);
156 foreach my $aln (@aln) {
157 $maxn = $aln->maxdisplayname_length();
158 foreach $seq ( $aln->each_seq() ) {
159 $namestr = $aln->displayname($seq->get_nse());
160 $add = $maxn - length($namestr) + 2;
161 $namestr .= " " x $add;
162 $self->_print (sprintf("%s %s\n",$namestr,$seq->seq())) or return;
163 }
164 }
165 $self->flush if $self->_flush_on_write && defined $self->_fh;
166 return 1;
167 }
168
169 1;