annotate variant_effect_predictor/Bio/SeqIO/gcg.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: gcg.pm,v 1.21 2002/10/25 16:22:01 jason Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::SeqIO::gcg
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by Ewan Birney <birney@ebi.ac.uk>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 # and Lincoln Stein <lstein@cshl.org>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 # Copyright Ewan Birney & Lincoln Stein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 # _history
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 # October 18, 1999 Largely rewritten by Lincoln Stein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 Bio::SeqIO::gcg - GCG sequence input/output stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Do not use this module directly. Use it via the Bio::SeqIO class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 This object can transform Bio::Seq objects to and from GCG flat
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 file databases.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 User feedback is an integral part of the evolution of this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 and other Bioperl modules. Send your comments and suggestions preferably
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 to one of the Bioperl mailing lists.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 http://www.bioperl.org/MailList.shtml - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 =head1 AUTHORS - Ewan Birney & Lincoln Stein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 Email: E<lt>birney@ebi.ac.ukE<gt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 E<lt>lstein@cshl.orgE<gt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 =head1 CONTRIBUTORS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 Jason Stajich, jason@bioperl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 methods. Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 # Let the code begin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 package Bio::SeqIO::gcg;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 use Bio::SeqIO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 use Bio::Seq::SeqFactory;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 @ISA = qw(Bio::SeqIO);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 sub _initialize {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 my($self,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 $self->SUPER::_initialize(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 if( ! defined $self->sequence_factory ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 $self->sequence_factory(new Bio::Seq::SeqFactory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 (-verbose => $self->verbose(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 -type => 'Bio::Seq::RichSeq'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 =head2 next_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 Title : next_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 Usage : $seq = $stream->next_seq()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 Function: returns the next sequence in the stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 Returns : Bio::Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 sub next_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 my ($self,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 my($id,$type,$desc,$line,$chksum,$sequence,$date,$len);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 while( defined($_ = $self->_readline()) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 ## Get the descriptive info (anything before the line with '..')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 unless( /\.\.$/ ) { $desc.= $_; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 ## Pull ID, Checksum & Type from the line containing '..'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 /\.\.$/ && do { $line = $_; chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 if(/Check\:\s(\d+)\s/) { $chksum = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 if(/Type:\s(\w)\s/) { $type = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 if(/(\S+)\s+Length/)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 { $id = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 if(/Length:\s+(\d+)\s+(\S.+\S)\s+Type/ )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 { $len = $1; $date = $2;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 return if ( !defined $_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 chomp($desc); # remove last "\n"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 while( defined($_ = $self->_readline()) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 ## This is where we grab the sequence info.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 if( /\.\.$/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 $self->throw("Looks like start of another sequence. See documentation. ");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 next if($_ eq "\n"); ## skip whitespace lines in formatted seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 s/[^a-zA-Z]//g; ## remove anything that is not alphabet char
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 # $_ = uc($_); ## uppercase sequence: NO. Keep the case. HL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 $sequence .= $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 ##If we parsed out a checksum, we might as well test it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 if(defined $chksum) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 unless(_validate_checksum($sequence,$chksum)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 $self->throw("Checksum failure on parsed sequence.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 ## Remove whitespace from identifier because the constructor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 ## will throw a warning otherwise...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 if(defined $id) { $id =~ s/\s+//g;}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 ## Turn our parsed "Type: N" or "Type: P" (if found) into the appropriate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 ## keyword that the constructor expects...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 if(defined $type) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 if($type eq "N") { $type = "dna"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 if($type eq "P") { $type = "prot"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 return $self->sequence_factory->create(-seq => $sequence,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 -id => $id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 -desc => $desc,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 -type => $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 -dates => [ $date ]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 =head2 write_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 Title : write_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 Usage : $stream->write_seq(@seq)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 Function: writes the formatted $seq object into the stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 Returns : 1 for success and 0 for error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 Args : array of Bio::PrimarySeqI object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 sub write_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 my ($self,@seq) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 for my $seq (@seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 $self->throw("Did not provide a valid Bio::PrimarySeqI object")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 unless defined $seq && ref($seq) && $seq->isa('Bio::PrimarySeqI');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 my $str = $seq->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my $comment = $seq->desc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 my $id = $seq->id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 my $type = ( $seq->alphabet() =~ /[dr]na/i ) ? 'N' : 'P';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 my $timestamp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 if( $seq->can('get_dates') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 ($timestamp) = $seq->get_dates;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 $timestamp = localtime(time);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 my($sum,$offset,$len,$i,$j,$cnt,@out);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 $len = length($str);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 ## Set the offset if we have any non-standard numbering going on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 $offset=1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 # checksum
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 $sum = $self->GCG_checksum($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 #Output the sequence header info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 push(@out,"$comment\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 push(@out,"$id Length: $len $timestamp Type: $type Check: $sum ..\n\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 #Format the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 $i = $#out + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 for($j = 0 ; $j < $len ; ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 if( $j % 50 == 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 $out[$i] = sprintf("%8d ",($j+$offset)); #numbering
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 $out[$i] .= sprintf("%s",substr($str,$j,10));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 $j += 10;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 if( $j < $len && $j % 50 != 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 $out[$i] .= " ";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 }elsif($j % 50 == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 $out[$i++] .= "\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 local($^W) = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 if($j % 50 != 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 $out[$i] .= "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 $out[$i] .= "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 return unless $self->_print(@out);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 $self->flush if $self->_flush_on_write && defined $self->_fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 =head2 GCG_checksum
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 Title : GCG_checksum
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 Usage : $cksum = $gcgio->GCG_checksum($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 Function : returns a gcg checksum for the sequence specified
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 This method can also be called as a class method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 Returns : a GCG checksum string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 Argument : a Bio::PrimarySeqI implementing object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 sub GCG_checksum {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 my ($self,$seqobj) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 my $index = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 my $checksum = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 my $char;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 my $seq = $seqobj->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 $seq =~ tr/a-z/A-Z/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 foreach $char ( split(/[\.\-]*/, $seq)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 $index++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 $checksum += ($index * (unpack("c",$char) || 0) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 if( $index == 57 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 $index = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 return ($checksum % 10000);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 =head2 _validate_checksum
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 Title : _validate_checksum
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 Usage : n/a - internal method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 Function: if parsed gcg sequence contains a checksum field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 : we compare it to a value computed here on the parsed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 : sequence. A checksum mismatch would indicate some
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 : type of parsing failure occured.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 Returns : 1 for success, 0 for failure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 Args : string containing parsed seq, value of parsed cheksum
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 sub _validate_checksum {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 my($seq,$parsed_sum) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 my($i,$len,$computed_sum,$cnt);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 $len = length($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 #Generate the GCG Checksum value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 for($i=0; $i<$len ;$i++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 $cnt++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 $computed_sum += $cnt * ord(substr($seq,$i,1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 ($cnt == 57) && ($cnt=0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 $computed_sum %= 10000;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 ## Compare and decide if success or failure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 if($parsed_sum == $computed_sum) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 } else { return 0; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 1;