annotate variant_effect_predictor/Bio/PrimarySeq.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: PrimarySeq.pm,v 1.73.2.1 2003/06/29 00:25:27 jason Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # bioperl module for Bio::PrimarySeq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by Ewan Birney <birney@sanger.ac.uk>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Ewan Birney
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Bio::PrimarySeq - Bioperl lightweight Sequence Object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 # The Bio::SeqIO for file reading, Bio::DB::GenBank for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 # database reading
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 use Bio::Seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 use Bio::SeqIO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 use Bio::DB::GenBank;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 #make from memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 $seqobj = Bio::PrimarySeq->new ( -seq => 'ATGGGGTGGGCGGTGGGTGGTTTG',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 -id => 'GeneFragment-12',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 -accession_number => 'X78121',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 -alphabet => 'dna',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 -is_circular => 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 print "Sequence ", $seqobj->id(), " with accession ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 $seqobj->accession_number, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 # read from file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 $inputstream = Bio::SeqIO->new(-file => "myseq.fa",-format => 'Fasta');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 $seqobj = $inputstream->next_seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 print "Sequence ", $seqobj->id(), " and desc ", $seqobj->desc, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 # to get out parts of the sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 print "Sequence ", $seqobj->id(), " with accession ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 $seqobj->accession_number, " and desc ", $seqobj->desc, "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 $string = $seqobj->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 $string2 = $seqobj->subseq(1,40);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 PrimarySeq is a lightweight Sequence object, storing little more than
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 the sequence, its name, a computer useful unique name. It does not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 contain sequence features or other information. To have a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 with sequence features you should use the Seq object which uses this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 object - go perldoc Bio::Seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 Although newusers will use Bio::PrimarySeq alot, in general you will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 be using it from the Bio::Seq object. For more information on Bio::Seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 go perldoc Bio::Seq. For interest you might like to known that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 Bio::Seq has-a Bio::PrimarySeq and forwards most of the function calls
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 to do with sequence to it (the has-a relationship lets us get out of a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 otherwise nasty cyclical reference in Perl which would leak memory).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 Sequence objects are defined by the Bio::PrimarySeqI interface, and this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 object is a pure Perl implementation of the interface (if that's
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 gibberish to you, don't worry. The take home message is that this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 object is the bioperl default sequence object, but other people can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 use their own objects as sequences if they so wish). If you are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 interested in wrapping your own objects as compliant Bioperl sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 objects, then you should read the Bio::PrimarySeqI documentation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 The documenation of this object is a merge of the Bio::PrimarySeq and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 Bio::PrimarySeqI documentation. This allows all the methods which you can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 call on sequence objects here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 Bioperl modules. Send your comments and suggestions preferably to one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 of the Bioperl mailing lists. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 =head1 AUTHOR - Ewan Birney
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 Email birney@sanger.ac.uk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 Describe contact details here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 methods. Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 # Let the code begin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 package Bio::PrimarySeq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 use Bio::Root::Root;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 use Bio::PrimarySeqI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 use Bio::IdentifiableI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 use Bio::DescribableI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 @ISA = qw(Bio::Root::Root Bio::PrimarySeqI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 Bio::IdentifiableI Bio::DescribableI);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 # setup the allowed values for alphabet()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 my %valid_type = map {$_, 1} qw( dna rna protein );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 Title : new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 Usage : $seq = Bio::PrimarySeq->new( -seq => 'ATGGGGGTGGTGGTACCCT',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 -id => 'human_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 -accession_number => 'AL000012',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 Function: Returns a new primary seq object from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 basic constructors, being a string for the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 and strings for id and accession_number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 Note that you can provide an empty sequence string. However, in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 this case you MUST specify the type of sequence you wish to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 initialize by the parameter -alphabet. See alphabet() for possible
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 values.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 Returns : a new Bio::PrimarySeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 Args : -seq => sequence string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 -display_id => display id of the sequence (locus name)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 -accession_number => accession number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 -primary_id => primary id (Genbank id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 -namespace => the namespace for the accession
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 -authority => the authority for the namespace
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 -desc => description text
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 -alphabet => sequence type (alphabet) (dna|rna|protein)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 -id => alias for display id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 -is_circular => boolean field for whether or not sequence is circular
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my ($class, @args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 my $self = $class->SUPER::new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 my($seq,$id,$acc,$pid,$ns,$auth,$v,$oid,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 $desc,$alphabet,$given_id,$is_circular,$direct,$ref_to_seq,$len) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 $self->_rearrange([qw(SEQ
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 DISPLAY_ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 ACCESSION_NUMBER
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 PRIMARY_ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 NAMESPACE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 AUTHORITY
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 VERSION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 OBJECT_ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 DESC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 ALPHABET
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 IS_CIRCULAR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 DIRECT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 REF_TO_SEQ
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 LENGTH
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 )],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 @args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 if( defined $id && defined $given_id ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 if( $id ne $given_id ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 $self->throw("Provided both id and display_id constructor ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 "functions. [$id] [$given_id]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 if( defined $given_id ) { $id = $given_id; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 # let's set the length before the seq -- if there is one, this length is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 # going to be invalidated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 defined $len && $self->length($len);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 # if alphabet is provided we set it first, so that it won't be guessed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 # when the sequence is set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 $alphabet && $self->alphabet($alphabet);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 # if there is an alphabet, and direct is passed in, assumme the alphabet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 # and sequence is ok
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 if( $direct && $ref_to_seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 $self->{'seq'} = $$ref_to_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 if( ! $alphabet ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 $self->_guess_alphabet();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 } # else it has been set already above
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 # print STDERR "DEBUG: setting sequence to [$seq]\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 # note: the sequence string may be empty
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 $self->seq($seq) if defined($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 $id && $self->display_id($id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 $acc && $self->accession_number($acc);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 defined $pid && $self->primary_id($pid);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 $desc && $self->desc($desc);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 $is_circular && $self->is_circular($is_circular);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 $ns && $self->namespace($ns);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 $auth && $self->authority($auth);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 defined($v) && $self->version($v);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 defined($oid) && $self->object_id($oid);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 sub direct_seq_set {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 my $obj = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 return $obj->{'seq'} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 =head2 seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 Title : seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 Usage : $string = $obj->seq()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 Function: Returns the sequence as a string of letters. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 case of the letters is left up to the implementer.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 Suggested cases are upper case for proteins and lower case for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 DNA sequence (IUPAC standard), but you should not rely on this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 Args : Optionally on set the new value (a string). An optional second
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 argument presets the alphabet (otherwise it will be guessed).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 Both parameters may also be given in named paramater style
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 with -seq and -alphabet being the names.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 sub seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 my ($obj,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 if( scalar(@args) == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 return $obj->{'seq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 my ($value,$alphabet) = @args;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 if(@args) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 if(defined($value) && (! $obj->validate_seq($value))) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 $obj->throw("Attempting to set the sequence to [$value] ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 "which does not look healthy");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 # if a sequence was already set we make sure that we re-adjust the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 # mol.type, otherwise we skip guessing if mol.type is already set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 # note: if the new seq is empty or undef, we don't consider that a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 # change (we wouldn't have anything to guess on anyway)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 my $is_changed_seq =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 exists($obj->{'seq'}) && (CORE::length($value || '') > 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 $obj->{'seq'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 # new alphabet overridden by arguments?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 if($alphabet) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 # yes, set it no matter what
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 $obj->alphabet($alphabet);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 } elsif( # if we changed a previous sequence to a new one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 $is_changed_seq ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 # or if there is no alphabet yet at all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 (! defined($obj->alphabet()))) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 # we need to guess the (possibly new) alphabet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 $obj->_guess_alphabet();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 } # else (seq not changed and alphabet was defined) do nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 # if the seq is changed, make sure we unset a possibly set length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 $obj->length(undef) if $is_changed_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 return $obj->{'seq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 =head2 validate_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 Title : validate_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 Usage : if(! $seq->validate_seq($seq_str) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 print "sequence $seq_str is not valid for an object of type ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 ref($seq), "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 Function: Validates a given sequence string. A validating sequence string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 must be accepted by seq(). A string that does not validate will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 lead to an exception if passed to seq().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 The implementation provided here does not take alphabet() into
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 account. Allowed are all letters (A-Z) and '-','.', '*' and '?'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 Returns : 1 if the supplied sequence string is valid for the object, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 0 otherwise.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 Args : The sequence string to be validated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 sub validate_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 my ($self,$seqstr) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 if( ! defined $seqstr ){ $seqstr = $self->seq(); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 return 0 unless( defined $seqstr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 if((CORE::length($seqstr) > 0) && ($seqstr !~ /^([A-Za-z\-\.\*\?]+)$/)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 $self->warn("seq doesn't validate, mismatch is " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 ($seqstr =~ /([^A-Za-z\-\.\*\?]+)/g));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 =head2 subseq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 Title : subseq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 Usage : $substring = $obj->subseq(10,40);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 Function: returns the subseq from start to end, where the first base
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 is 1 and the number is inclusive, ie 1-2 are the first two
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 bases of the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 Returns : a string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 Args : integer for start position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 integer for end position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 OR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 Bio::LocationI location for subseq (strand honored)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 sub subseq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 my ($self,$start,$end,$replace) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 if( ref($start) && $start->isa('Bio::LocationI') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 my $loc = $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 $replace = $end; # do we really use this anywhere? scary. HL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 my $seq = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 foreach my $subloc ($loc->each_Location()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 my $piece = $self->subseq($subloc->start(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $subloc->end(), $replace);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 if($subloc->strand() < 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 $piece = Bio::PrimarySeq->new('-seq' => $piece)->revcom()->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 $seq .= $piece;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 return $seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 } elsif( defined $start && defined $end ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 if( $start > $end ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 $self->throw("in subseq, start [$start] has to be ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 "greater than end [$end]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 if( $start <= 0 || $end > $self->length ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 $self->throw("You have to have start positive\n\tand length less ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 "than the total length of sequence [$start:$end] ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 "Total ".$self->length."");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 # remove one from start, and then length is end-start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 $start--;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 if( defined $replace ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 return substr( $self->seq(), $start, ($end-$start), $replace);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 return substr( $self->seq(), $start, ($end-$start));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 $self->warn("Incorrect parameters to subseq - must be two integers ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 "or a Bio::LocationI object not ($start,$end)");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 =head2 length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 Title : length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 Usage : $len = $seq->length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 Function: Get the length of the sequence in number of symbols (bases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 or amino acids).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 You can also set this attribute, even to a number that does
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 not match the length of the sequence string. This is useful
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 if you don''t want to set the sequence too, or if you want
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 to free up memory by unsetting the sequence. In the latter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 case you could do e.g.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 $seq->length($seq->length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 $seq->seq(undef);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 Note that if you set the sequence to a value other than
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 undef at any time, the length attribute will be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 invalidated, and the length of the sequence string will be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 reported again. Also, we won''t let you lie about the length.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 Returns : integer representing the length of the sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 Args : Optionally, the value on set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 sub length {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 my $len = CORE::length($self->seq() || '');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 if(@_) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 my $val = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 if(defined($val) && $len && ($len != $val)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 $self->throw("You're trying to lie about the length: ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 "is $len but you say ".$val);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 $self->{'_seq_length'} = $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 } elsif(defined($self->{'_seq_length'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 return $self->{'_seq_length'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 return $len;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 =head2 display_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 Title : display_id or display_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 Usage : $id_string = $obj->display_id();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 Function: returns the display id, aka the common name of the Sequence object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 The semantics of this is that it is the most likely string to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 be used as an identifier of the sequence, and likely to have
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 "human" readability. The id is equivalent to the ID field of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 the GenBank/EMBL databanks and the id field of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 Swissprot/sptrembl database. In fasta format, the >(\S+) is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 presumed to be the id, though some people overload the id to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 embed other information. Bioperl does not use any embedded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 information in the ID field, and people are encouraged to use
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 other mechanisms (accession field for example, or extending
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 the sequence object) to solve this.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 With the new Bio::DescribeableI interface, display_name aliases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 to this method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 Returns : A string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 sub display_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 my ($obj,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 $obj->{'display_id'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 return $obj->{'display_id'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 =head2 accession_number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 Title : accession_number or object_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 Usage : $unique_key = $obj->accession_number;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 Function: Returns the unique biological id for a sequence, commonly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 called the accession_number. For sequences from established
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 databases, the implementors should try to use the correct
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 accession number. Notice that primary_id() provides the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 unique id for the implemetation, allowing multiple objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 to have the same accession number in a particular implementation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 For sequences with no accession number, this method should
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 return "unknown".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 [Note this method name is likely to change in 1.3]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 With the new Bio::IdentifiableI interface, this is aliased
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 to object_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 Returns : A string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 Args : A string (optional) for setting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 sub accession_number {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 my( $obj, $acc ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 if (defined $acc) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 $obj->{'accession_number'} = $acc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 $acc = $obj->{'accession_number'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 $acc = 'unknown' unless defined $acc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 return $acc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 =head2 primary_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 Title : primary_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 Usage : $unique_key = $obj->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 Function: Returns the unique id for this object in this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 implementation. This allows implementations to manage their
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 own object ids in a way the implementaiton can control
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 clients can expect one id to map to one object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 For sequences with no natural primary id, this method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 should return a stringified memory location.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 Returns : A string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 Args : A string (optional, for setting)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 sub primary_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 my ($obj,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 $obj->{'primary_id'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 if( ! exists $obj->{'primary_id'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 return "$obj";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 return $obj->{'primary_id'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 =head2 alphabet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 Title : alphabet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 Function: Returns the type of sequence being one of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 'dna', 'rna' or 'protein'. This is case sensitive.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 This is not called <type> because this would cause
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 upgrade problems from the 0.5 and earlier Seq objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 Returns : a string either 'dna','rna','protein'. NB - the object must
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 make a call of the type - if there is no type specified it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 has to guess.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 sub alphabet {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 my ($obj,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 if (defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 $value = lc $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 unless ( $valid_type{$value} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 $obj->throw("Molecular type '$value' is not a valid type (".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 join(',', map "'$_'", sort keys %valid_type) .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 ") lowercase");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 $obj->{'alphabet'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 return $obj->{'alphabet'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 =head2 desc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 Title : desc or description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 Usage : $obj->desc($newval)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 Function: Get/set description of the sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 description is an alias for this for compliance with the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 Bio::DescribeableI interface.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 Returns : value of desc (a string)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 Args : newvalue (a string or undef, optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 sub desc{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 return $self->{'desc'} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 return $self->{'desc'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 =head2 can_call_new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 Title : can_call_new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 Usage :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 Returns : true
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 sub can_call_new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 =head2 id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 Title : id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 Usage : $id = $seq->id()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 Function: This is mapped on display_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 sub id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 return shift->display_id(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 =head2 is_circular
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 Title : is_circular
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 Usage : if( $obj->is_circular) { /Do Something/ }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 Function: Returns true if the molecule is circular
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 Returns : Boolean value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 sub is_circular{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 return $self->{'is_circular'} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 return $self->{'is_circular'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 =head1 Methods for Bio::IdentifiableI compliance
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 =head2 object_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 Title : object_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 Usage : $string = $obj->object_id()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 Function: a string which represents the stable primary identifier
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 in this namespace of this object. For DNA sequences this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 is its accession_number, similarly for protein sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 This is aliased to accession_number().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 sub object_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 return shift->accession_number(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 =head2 version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 Title : version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 Usage : $version = $obj->version()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 Function: a number which differentiates between versions of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 the same object. Higher numbers are considered to be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 later and more relevant, but a single object described
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 the same identifier should represent the same concept
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 Returns : A number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 sub version{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 my ($self,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 $self->{'_version'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 return $self->{'_version'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 =head2 authority
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 Title : authority
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 Usage : $authority = $obj->authority()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 Function: a string which represents the organisation which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 granted the namespace, written as the DNS name for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 organisation (eg, wormbase.org)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 sub authority {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 my ($obj,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 $obj->{'authority'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 return $obj->{'authority'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 =head2 namespace
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 Title : namespace
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 Usage : $string = $obj->namespace()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 Function: A string representing the name space this identifier
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 is valid in, often the database name or the name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 describing the collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 sub namespace{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 my ($self,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 $self->{'namespace'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 return $self->{'namespace'} || "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 =head1 Methods for Bio::DescribableI compliance
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 This comprises of display_name and description.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 =head2 display_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 Title : display_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 Usage : $string = $obj->display_name()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 Function: A string which is what should be displayed to the user
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 the string should have no spaces (ideally, though a cautious
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 user of this interface would not assumme this) and should be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 less than thirty characters (though again, double checking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 this is a good idea)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 This is aliased to display_id().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 sub display_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 return shift->display_id(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 =head2 description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 Title : description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 Usage : $string = $obj->description()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 Function: A text string suitable for displaying to the user a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 description. This string is likely to have spaces, but
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 should not have any newlines or formatting - just plain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 text. The string should not be greater than 255 characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 and clients can feel justified at truncating strings at 255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 characters for the purposes of display
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 This is aliased to desc().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 sub description {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 return shift->desc(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 =head1 Methods Inherited from Bio::PrimarySeqI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 These methods are available on Bio::PrimarySeq, although they are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 actually implemented on Bio::PrimarySeqI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 =head2 revcom
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 Title : revcom
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 Usage : $rev = $seq->revcom()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 Function: Produces a new Bio::SeqI implementing object which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 is the reversed complement of the sequence. For protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 sequences this throws an exception of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 "Sequence is a protein. Cannot revcom"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 The id is the same id as the orginal sequence, and the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 accession number is also indentical. If someone wants to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 track that this sequence has be reversed, it needs to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 define its own extensions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 To do an inplace edit of an object you can go:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 $seqobj = $seqobj->revcom();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 This of course, causes Perl to handle the garbage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 collection of the old object, but it is roughly speaking as
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 efficient as an inplace edit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 Returns : A new (fresh) Bio::SeqI object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 =head2 trunc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 Title : trunc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 Usage : $subseq = $myseq->trunc(10,100);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 Function: Provides a truncation of a sequence,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 Returns : a fresh Bio::SeqI implementing object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 =head1 Internal methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 These are internal methods to PrimarySeq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 =head2 _guess_alphabet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 Title : _guess_alphabet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 Usage :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819 Returns :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 sub _guess_alphabet {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 my ($str,$str2,$total,$atgc,$u,$type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 $str = $self->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 $str =~ s/\-\.\?//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 $total = CORE::length($str);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 if( $total == 0 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 $self->throw("Got a sequence with no letters in - ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 "cannot guess alphabet [$str]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 $u = ($str =~ tr/Uu//);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 $atgc = ($str =~ tr/ATGCNatgcn//);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 if( ($atgc / $total) > 0.85 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 $type = 'dna';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 } elsif( (($atgc + $u) / $total) > 0.85 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 $type = 'rna';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 $type = 'protein';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 $self->alphabet($type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 return $type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 ############################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 # aliases due to name changes or to compensate for our lack of consistency #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 ############################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 sub accession {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 $self->warn(ref($self)."::accession is deprecated, ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 "use accession_number() instead");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 return $self->accession_number(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866