annotate variant_effect_predictor/Bio/Seq.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: Seq.pm,v 1.76.2.2 2003/07/03 20:01:32 jason Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::Seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by Ewan Birney <birney@ebi.ac.uk>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Ewan Birney
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Bio::Seq - Sequence object, with features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 # This is the main sequence object in Bioperl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 # gets a sequence from a file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 $seqio = Bio::SeqIO->new( '-format' => 'embl' , -file => 'myfile.dat');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 $seqobj = $seqio->next_seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 # SeqIO can both read and write sequences; see Bio::SeqIO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 # for more information and examples
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 # get from database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 $db = Bio::DB::GenBank->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 $seqobj = $db->get_Seq_by_acc('X78121');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 # make from strings in script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 $seqobj = Bio::Seq->new( -display_id => 'my_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 -seq => $sequence_as_string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 # gets sequence as a string from sequence object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 $seqstr = $seqobj->seq(); # actual sequence as a string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 $seqstr = $seqobj->subseq(10,50); # slice in biological coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 # retrieves information from the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 # features must implement Bio::SeqFeatureI interface
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 @features = $seqobj->get_SeqFeatures(); # just top level
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 foreach my $feat ( @features ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 print "Feature ",$feat->primary_tag," starts ",$feat->start," ends ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 $feat->end," strand ",$feat->strand,"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 # features retain link to underlying sequence object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 print "Feature sequence is ",$feat->seq->seq(),"\n"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 # sequences may have a species
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 if( defined $seq->species ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 print "Sequence is from ",$species->binomial_name," [",$species->common_name,"]\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 # annotation objects are Bio::AnnotationCollectionI's
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 $ann = $seqobj->annotation(); # annotation object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 # references is one type of annotations to get. Also get
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 # comment and dblink. Look at Bio::AnnotationCollection for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 # more information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 foreach my $ref ( $ann->get_Annotations('reference') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 print "Reference ",$ref->title,"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 # you can get truncations, translations and reverse complements, these
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 # all give back Bio::Seq objects themselves, though currently with no
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 # features transfered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 my $trunc = $seqobj->trunc(100,200);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 my $rev = $seqobj->revcom();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 # there are many options to translate - check out the docs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 my $trans = $seqobj->translate();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 # these functions can be chained together
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 my $trans_trunc_rev = $seqobj->trunc(100,200)->revcom->translate();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 A Seq object is a sequence with sequence features placed on it. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 Seq object contains a PrimarySeq object for the actual sequence and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 also implements its interface.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 In Bioperl we have 3 main players that people are going to use frequently
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 Bio::PrimarySeq - just the sequence and its names, nothing else.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 Bio::SeqFeatureI - a location on a sequence, potentially with a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 and annotation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 Bio::Seq - A sequence and a collection of sequence features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 (an aggregate) with its own annotation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 Although Bioperl is not tied heavily to file formats these distinctions do
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 map to file formats sensibly and for some bioinformaticians this might help
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 Bio::PrimarySeq - Fasta file of a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 Bio::SeqFeatureI - A single entry in an EMBL/GenBank/DDBJ feature table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 Bio::Seq - A single EMBL/GenBank/DDBJ entry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 By having this split we avoid a lot of nasty circular references
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 (sequence features can hold a reference to a sequence without the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 holding a reference to the sequence feature). See L<Bio::PrimarySeq> and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 L<Bio::SeqFeatureI> for more information.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 Ian Korf really helped in the design of the Seq and SeqFeature system.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 =head1 EXAMPLES
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 A simple and fundamental block of code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 use Bio::SeqIO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 my $seqIOobj = Bio::SeqIO->new(-file=>"1.fa"); # create a SeqIO object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $seqobj = $seqIOobj->next_seq; # get a Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 With the Seq object in hand one has access to a powerful set of Bioperl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 methods and Bioperl objects. This next script will take a file of sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 in EMBL format and create a file of the reverse-complemented sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 in Fasta format using Seq objects. It also prints out details about the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 exons it finds as sequence features in Genbank Flat File format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 use Bio::Seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 use Bio::SeqIO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 while((my $seqobj = $seqin->next_seq())) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 print "Seen sequence ",$seqobj->display_id,", start of seq ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 substr($seqobj->seq,1,10),"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 if( $seqobj->alphabet eq 'dna') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 $rev = $seqobj->revcom;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 $id = $seqobj->display_id();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 $id = "$id.rev";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 $rev->display_id($id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 $seqout->write_seq($rev);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 foreach $feat ( $seqobj->get_SeqFeatures() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 if( $feat->primary_tag eq 'exon' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 print STDOUT "Location ",$feat->start,":",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 $feat->end," GFF[",$feat->gff_string,"]\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 Let's examine the script. The lines below import the Bioperl modules.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 Seq is the main Bioperl sequence object and SeqIO is the Bioperl support
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 for reading sequences from files and to files
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 use Bio::Seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 use Bio::SeqIO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 These two lines create two SeqIO streams: one for reading in sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 and one for outputting sequences:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 Notice that in the "$seqout" case there is a greater-than sign,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 indicating the file is being opened for writing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 Using the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 '-argument' => value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 syntax is common in Bioperl. The file argument is like an argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 to open() . You can also pass in filehandles or FileHandle objects by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 using the -fh argument (see L<Bio::SeqIO> documentation for details).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 Many formats in Bioperl are handled, including Fasta, EMBL, GenBank,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 Swissprot (swiss), PIR, and GCG.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 $seqin = Bio::SeqIO->new( -format => 'EMBL' , -file => 'myfile.dat');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 $seqout= Bio::SeqIO->new( -format => 'Fasta', -file => '>output.fa');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 This is the main loop which will loop progressively through sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 in a file, and each call to $seqio-E<gt>next_seq() provides a new Seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 object from the file:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 while((my $seqobj = $seqio->next_seq())) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 This print line below accesses fields in the Seq object directly. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 $seqobj-E<gt>display_id is the way to access the display_id attribute
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 of the Seq object. The $seqobj-E<gt>seq method gets the actual
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 sequence out as string. Then you can do manipulation of this if
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 you want to (there are however easy ways of doing truncation,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 reverse-complement and translation).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 print "Seen sequence ",$seqobj->display_id,", start of seq ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 substr($seqobj->seq,1,10),"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 Bioperl has to guess the alphabet of the sequence, being either 'dna',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 'rna', or 'protein'. The alphabet attribute is one of these three
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 possibilities.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 if( $seqobj->alphabet eq 'dna') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 The $seqobj-E<gt>revcom method provides the reverse complement of the Seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 object as another Seq object. Thus, the $rev variable is a reference to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 another Seq object. For example, one could repeat the above print line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 for this Seq object (putting $rev in place of $seqobj). In this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 case we are going to output the object into the file stream we built
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 earlier on.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 $rev = $seqobj->revcom;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 When we output it, we want the id of the outputted object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 to be changed to "$id.rev", ie, with .rev on the end of the name. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 following lines retrieve the id of the sequence object, add .rev
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 to this and then set the display_id of the rev sequence object to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 this. Notice that to set the display_id attribute you just need
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 call the same method, display_id(), with the new value as an argument.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 Getting and setting values with the same method is common in Bioperl.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 $id = $seqobj->display_id();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 $id = "$id.rev";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 $rev->display_id($id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 The write_seq method on the SeqIO output object, $seqout, writes the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 $rev object to the filestream we built at the top of the script.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 The filestream knows that it is outputting in fasta format, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 so it provides fasta output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 $seqout->write_seq($rev);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 This block of code loops over sequence features in the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 object, trying to find ones who have been tagged as 'exon'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 Features have start and end attributes and can be outputted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 in Genbank Flat File format, GFF, a standarized format for sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 foreach $feat ( $seqobj->get_SeqFeatures() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 if( $feat->primary_tag eq 'exon' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 print STDOUT "Location ",$feat->start,":",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 $feat->end," GFF[",$feat->gff_string,"]\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 The code above shows how a few Bio::Seq methods suffice to read, parse,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 reformat and analyze sequences from a file. A full list of methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 available to Bio::Seq objects is shown below. Bear in mind that some of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 these methods come from PrimarySeq objects, which are simpler
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 than Seq objects, stripped of features (see L<Bio::PrimarySeq> for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 more information).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 # these methods return strings, and accept strings in some cases:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 $seqobj->seq(); # string of sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 $seqobj->subseq(5,10); # part of the sequence as a string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 $seqobj->accession_number(); # when there, the accession number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 $seqobj->moltype(); # one of 'dna','rna',or 'protein'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 $seqobj->seq_version() # when there, the version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 $seqobj->keywords(); # when there, the Keywords line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $seqobj->length() # length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 $seqobj->desc(); # description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $seqobj->primary_id(); # a unique id for this sequence regardless
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 # of its display_id or accession number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 $seqobj->display_id(); # the human readable id of the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 Some of these values map to fields in common formats. For example, The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 display_id() method returns the LOCUS name of a Genbank entry,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 the (\S+) following the E<gt> character in a Fasta file, the ID from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 a SwissProt file, and so on. The desc() method will return the DEFINITION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 line of a Genbank file, the description following the display_id in a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 Fasta file, and the DE field in a SwissProt file.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 # the following methods return new Seq objects, but
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 # do not transfer features across to the new object:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 $seqobj->trunc(5,10) # truncation from 5 to 10 as new object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 $seqobj->revcom # reverse complements sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 $seqobj->translate # translation of the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 # if new() can be called this method returns 1, else 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 $seqobj->can_call_new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 # the following method determines if the given string will be accepted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 # by the seq() method - if the string is acceptable then validate()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 # returns 1, or 0 if not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 $seqobj->validate_seq($string)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 # the following method returns or accepts a Species object:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 $seqobj->species();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 Please see L<Bio::Species> for more information on this object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 # the following method returns or accepts an Annotation object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 # which in turn allows access to Annotation::Reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 # and Annotation::Comment objects:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 $seqobj->annotation();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 These annotations typically refer to entire sequences, unlike
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 features. See L<Bio::AnnotationCollectionI>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 L<Bio::Annotation::Collection>, L<Bio::Annotation::Reference>, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 L<Bio::Annotation::Comment> for details.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 It is also important to be able to describe defined portions of a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 sequence. The combination of some description and the corresponding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 sub-sequence is called a feature - an exon and its coordinates within
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 a gene is an example of a feature, or a domain within a protein.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 # the following methods return an array of SeqFeatureI objects:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 $seqobj->get_SeqFeatures # The 'top level' sequence features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 $seqobj->get_all_SeqFeatures # All sequence features, including sub-seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 # features, such as features in an exon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 # to find out the number of features use:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 $seqobj->feature_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 Here are just some of the methods available to SeqFeatureI objects:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 # these methods return numbers:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 $feat->start # start position (1 is the first base)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 $feat->end # end position (2 is the second base)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 $feat->strand # 1 means forward, -1 reverse, 0 not relevant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 # these methods return or accept strings:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 $feat->primary_tag # the name of the sequence feature, eg
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 # 'exon', 'glycoslyation site', 'TM domain'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 $feat->source_tag # where the feature comes from, eg, 'EMBL_GenBank',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 # or 'BLAST'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 # this method returns the more austere PrimarySeq object, not a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 # Seq object - the main difference is that PrimarySeq objects do not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 # themselves contain sequence features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 $feat->seq # the sequence between start,end on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 # correct strand of the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 See L<Bio::PrimarySeq> for more details on PrimarySeq objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 # useful methods for feature comparisons, for start/end points
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 $feat->overlaps($other) # do $feat and $other overlap?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 $feat->contains($other) # is $other completely within $feat?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $feat->equals($other) # do $feat and $other completely agree?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 # one can also add features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 $seqobj->add_SeqFeature($feat) # returns 1 if successful
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 $seqobj->add_SeqFeature(@features) # returns 1 if successful
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 # sub features. For complex join() statements, the feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 # is one sequence feature with many sub SeqFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 $feat->sub_SeqFeature # returns array of sub seq features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 Please see L<Bio::SeqFeatureI> and L<Bio::SeqFeature::Generic>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 for more information on sequence features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 It is worth mentioning that one can also retrieve the start and end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 positions of a feature using a Bio::LocationI object:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 $location = $feat->location # $location is a Bio::LocationI object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 $location->start; # start position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 $location->end; # end position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 This is useful because one needs a Bio::Location::SplitLocationI object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 in order to retrieve the coordinates inside the Genbank or EMBL join()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 statements (e.g. "CDS join(51..142,273..495,1346..1474)"):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 if ( $feat->location->isa('Bio::Location::SplitLocationI') &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 $feat->primary_tag eq 'CDS' ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 foreach $loc ( $feat->location->sub_Location ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 print $loc->start . ".." . $loc->end . "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 See L<Bio::LocationI> and L<Bio::Location::SplitLocationI> for more
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 information.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 =head1 Implemented Interfaces
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 This class implements the following interfaces.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 =item Bio::SeqI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 Note that this includes implementing Bio::PrimarySeqI.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 =item Bio::IdentifiableI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 =item Bio::DescribableI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 =item Bio::AnnotatableI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 =item Bio::FeatureHolderI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 Bioperl modules. Send your comments and suggestions preferably to one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 of the Bioperl mailing lists. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 bioperl-bugs@bioperl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 =head1 AUTHOR - Ewan Birney, inspired by Ian Korf objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 Email birney@ebi.ac.uk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 =head1 CONTRIBUTORS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 Jason Stajich E<lt>jason@bioperl.orgE<gt>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 methods. Internal methods are usually preceded with a "_".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 # Let the code begin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 package Bio::Seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 use vars qw(@ISA $VERSION);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 # Object preamble - inherits from Bio::Root::Object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 use Bio::Root::Root;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 use Bio::SeqI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 use Bio::Annotation::Collection;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 use Bio::PrimarySeq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 use Bio::IdentifiableI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 use Bio::DescribableI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 use Bio::AnnotatableI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 use Bio::FeatureHolderI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 $VERSION = '1.1';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 @ISA = qw(Bio::Root::Root Bio::SeqI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 Bio::IdentifiableI Bio::DescribableI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 Bio::AnnotatableI Bio::FeatureHolderI);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 Title : new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 Usage : $seq = Bio::Seq->new( -seq => 'ATGGGGGTGGTGGTACCCT',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 -id => 'human_id',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 -accession_number => 'AL000012',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 Function: Returns a new Seq object from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 basic constructors, being a string for the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 and strings for id and accession_number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 Returns : a new Bio::Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 my($caller,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 if( $caller ne 'Bio::Seq') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 $caller = ref($caller) if ref($caller);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 # we know our inherietance heirarchy
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 my $self = Bio::Root::Root->new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 bless $self,$caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 # this is way too sneaky probably. We delegate the construction of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 # the Seq object onto PrimarySeq and then pop primary_seq into
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 # our primary_seq slot
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 my $pseq = Bio::PrimarySeq->new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 # as we have just made this, we know it is ok to set hash directly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 # rather than going through the method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 $self->{'primary_seq'} = $pseq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 # setting this array is now delayed until the final
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 # moment, again speed ups for non feature containing things
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 # $self->{'_as_feat'} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 my ($ann, $pid,$feat,$species) = &Bio::Root::RootI::_rearrange($self,[qw(ANNOTATION PRIMARY_ID FEATURES SPECIES)], @args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 # for a number of cases - reading fasta files - these are never set. This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 # gives a quick optimisation around testing things later on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 if( defined $ann || defined $pid || defined $feat || defined $species ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 $pid && $self->primary_id($pid);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 $species && $self->species($species);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 $ann && $self->annotation($ann);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 if( defined $feat ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 if( ref($feat) !~ /ARRAY/i ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 if( ref($feat) && $feat->isa('Bio::SeqFeatureI') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 $self->add_SeqFeature($feat);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 $self->warn("Must specify a valid Bio::SeqFeatureI or ArrayRef of Bio::SeqFeatureI's with the -features init parameter for ".ref($self));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 foreach my $feature ( @$feat ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 $self->add_SeqFeature($feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 =head1 PrimarySeq interface
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 The PrimarySeq interface provides the basic sequence getting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 and setting methods for on all sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 These methods implement the Bio::PrimarySeq interface by delegating
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 to the primary_seq inside the object. This means that you
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 can use a Seq object wherever there is a PrimarySeq, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 of course, you are free to use these functions anyway.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 =head2 seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 Title : seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 Usage : $string = $obj->seq()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 Function: Get/Set the sequence as a string of letters. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 case of the letters is left up to the implementer.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 Suggested cases are upper case for proteins and lower case for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 DNA sequence (IUPAC standard),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 but implementations are suggested to keep an open mind about
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 case (some users... want mixed case!)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 Args : Optionally on set the new value (a string). An optional second
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 argument presets the alphabet (otherwise it will be guessed).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 Both parameters may also be given in named paramater style
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 with -seq and -alphabet being the names.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 sub seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 return shift->primary_seq()->seq(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 =head2 validate_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 Title : validate_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 Usage : if(! $seq->validate_seq($seq_str) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 print "sequence $seq_str is not valid for an object of type ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 ref($seq), "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 Function: Validates a given sequence string. A validating sequence string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 must be accepted by seq(). A string that does not validate will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 lead to an exception if passed to seq().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 The implementation provided here does not take alphabet() into
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 account. Allowed are all letters (A-Z) and '-','.', and '*'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 Returns : 1 if the supplied sequence string is valid for the object, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 0 otherwise.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 Args : The sequence string to be validated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 sub validate_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 return shift->primary_seq()->validate_seq(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 =head2 length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 Title : length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 Usage : $len = $seq->length()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 Function:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 Returns : Integer representing the length of the sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 sub length {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 return shift->primary_seq()->length(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 =head1 Methods from the Bio::PrimarySeqI interface
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 =head2 subseq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 Title : subseq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 Usage : $substring = $obj->subseq(10,40);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 Function: Returns the subseq from start to end, where the first base
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 is 1 and the number is inclusive, ie 1-2 are the first two
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 bases of the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 Start cannot be larger than end but can be equal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 Returns : A string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 Args : 2 integers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 sub subseq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 return shift->primary_seq()->subseq(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 =head2 display_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 Title : display_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 Usage : $id = $obj->display_id or $obj->display_id($newid);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 Function: Gets or sets the display id, also known as the common name of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 the Seq object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 The semantics of this is that it is the most likely string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 to be used as an identifier of the sequence, and likely to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 have "human" readability. The id is equivalent to the LOCUS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 field of the GenBank/EMBL databanks and the ID field of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 Swissprot/sptrembl database. In fasta format, the >(\S+) is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 presumed to be the id, though some people overload the id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 to embed other information. Bioperl does not use any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 embedded information in the ID field, and people are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 encouraged to use other mechanisms (accession field for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 example, or extending the sequence object) to solve this.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 Notice that $seq->id() maps to this function, mainly for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 legacy/convenience issues.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658 Returns : A string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 Args : None or a new id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 sub display_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 return shift->primary_seq->display_id(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 =head2 accession_number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 Title : accession_number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 Usage : $unique_biological_key = $obj->accession_number;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 Function: Returns the unique biological id for a sequence, commonly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 called the accession_number. For sequences from established
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 databases, the implementors should try to use the correct
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 accession number. Notice that primary_id() provides the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 unique id for the implemetation, allowing multiple objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 to have the same accession number in a particular implementation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 For sequences with no accession number, this method should return
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 "unknown".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 Can also be used to set the accession number.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 Example : $key = $seq->accession_number or $seq->accession_number($key)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 Returns : A string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 Args : None or an accession number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 sub accession_number {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 return shift->primary_seq->accession_number(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 =head2 desc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 Title : desc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 Usage : $seqobj->desc($string) or $seqobj->desc()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 Function: Sets or gets the description of the sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 Returns : The description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 Args : The description or none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 sub desc {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 return shift->primary_seq->desc(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 =head2 primary_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 Title : primary_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 Usage : $unique_implementation_key = $obj->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 Function: Returns the unique id for this object in this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 implementation. This allows implementations to manage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 their own object ids in a way the implementation can control
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 clients can expect one id to map to one object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 For sequences with no natural id, this method should return
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 a stringified memory location.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 Can also be used to set the primary_id.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 Also notice that this method is not delegated to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 internal Bio::PrimarySeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 [Note this method name is likely to change in 1.3]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 Example : $id = $seq->primary_id or $seq->primary_id($id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 Returns : A string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 Args : None or an id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 sub primary_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 my ($obj,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 $obj->{'primary_id'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 if( ! exists $obj->{'primary_id'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 return "$obj";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 return $obj->{'primary_id'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 =head2 can_call_new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 Title : can_call_new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 Usage : if ( $obj->can_call_new ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 $newobj = $obj->new( %param );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 Function: can_call_new returns 1 or 0 depending
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 on whether an implementation allows new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 constructor to be called. If a new constructor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 is allowed, then it should take the followed hashed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 constructor list.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 $myobject->new( -seq => $sequence_as_string,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 -display_id => $id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 -accession_number => $accession
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 -alphabet => 'dna',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 Returns : 1 or 0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 sub can_call_new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 =head2 alphabet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780 Title : alphabet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 Usage : if ( $obj->alphabet eq 'dna' ) { /Do Something/ }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782 Function: Returns the type of sequence being one of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 'dna', 'rna' or 'protein'. This is case sensitive.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 This is not called <type> because this would cause
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 upgrade problems from the 0.5 and earlier Seq objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 Returns : A string either 'dna','rna','protein'. NB - the object must
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 make a call of the type - if there is no type specified it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 has to guess.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 sub alphabet {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 return $self->primary_seq->alphabet(@_) if @_ && defined $_[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 return $self->primary_seq->alphabet();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 sub is_circular { shift->primary_seq->is_circular }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 =head1 Methods for Bio::IdentifiableI compliance
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 =head2 object_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 Title : object_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 Usage : $string = $obj->object_id()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 Function: a string which represents the stable primary identifier
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 in this namespace of this object. For DNA sequences this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 is its accession_number, similarly for protein sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 This is aliased to accession_number().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 sub object_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 return shift->accession_number(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 =head2 version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 Title : version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 Usage : $version = $obj->version()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 Function: a number which differentiates between versions of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 the same object. Higher numbers are considered to be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 later and more relevant, but a single object described
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 the same identifier should represent the same concept
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 Returns : A number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 sub version{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 return shift->primary_seq->version(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 =head2 authority
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 Title : authority
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 Usage : $authority = $obj->authority()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 Function: a string which represents the organisation which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 granted the namespace, written as the DNS name for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 organisation (eg, wormbase.org)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856 sub authority {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 return shift->primary_seq()->authority(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 =head2 namespace
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 Title : namespace
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 Usage : $string = $obj->namespace()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 Function: A string representing the name space this identifier
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 is valid in, often the database name or the name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 describing the collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873 sub namespace{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 return shift->primary_seq()->namespace(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 =head1 Methods for Bio::DescribableI compliance
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 =head2 display_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 Title : display_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884 Usage : $string = $obj->display_name()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 Function: A string which is what should be displayed to the user
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886 the string should have no spaces (ideally, though a cautious
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 user of this interface would not assumme this) and should be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 less than thirty characters (though again, double checking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 this is a good idea)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 This is aliased to display_id().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896 sub display_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 return shift->display_id(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900 =head2 description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 Title : description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903 Usage : $string = $obj->description()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 Function: A text string suitable for displaying to the user a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 description. This string is likely to have spaces, but
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 should not have any newlines or formatting - just plain
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 text. The string should not be greater than 255 characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 and clients can feel justified at truncating strings at 255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 characters for the purposes of display
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 This is aliased to desc().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912 Returns : A scalar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 sub description {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 return shift->desc(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 =head1 Methods for implementing Bio::AnnotatableI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 =head2 annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 Title : annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 Usage : $ann = $seq->annotation or $seq->annotation($annotation)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 Function: Gets or sets the annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 Returns : L<Bio::AnnotationCollectionI> object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 Args : None or L<Bio::AnnotationCollectionI> object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 See L<Bio::AnnotationCollectionI> and L<Bio::Annotation::Collection>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 for more information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 sub annotation {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 my ($obj,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 if( defined $value ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 $obj->throw("object of class ".ref($value)." does not implement ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 "Bio::AnnotationCollectionI. Too bad.")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 unless $value->isa("Bio::AnnotationCollectionI");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 $obj->{'_annotation'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944 } elsif( ! defined $obj->{'_annotation'}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 $obj->{'_annotation'} = new Bio::Annotation::Collection;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947 return $obj->{'_annotation'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 =head1 Methods to implement Bio::FeatureHolderI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952 This includes methods for retrieving, adding, and removing features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 =head2 get_SeqFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958 Title : get_SeqFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 Usage :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 Function: Get the feature objects held by this feature holder.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 Features which are not top-level are subfeatures of one or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 more of the returned feature objects, which means that you
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964 must traverse the subfeature arrays of each top-level
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 feature object in order to traverse all features associated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966 with this sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 Use get_all_SeqFeatures() if you want the feature tree
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 flattened into one single array.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 Returns : an array of Bio::SeqFeatureI implementing objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 At some day we may want to expand this method to allow for a feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 filter to be passed in.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980 sub get_SeqFeatures{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 if( !defined $self->{'_as_feat'} ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 $self->{'_as_feat'} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 return @{$self->{'_as_feat'}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 =head2 get_all_SeqFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 Title : get_all_SeqFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 Usage : @feat_ary = $seq->get_all_SeqFeatures();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 Function: Returns the tree of feature objects attached to this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995 sequence object flattened into one single array. Top-level
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 features will still contain their subfeature-arrays, which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 means that you will encounter subfeatures twice if you
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 traverse the subfeature tree of the returned objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000 Use get_SeqFeatures() if you want the array to contain only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 the top-level features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003 Returns : An array of Bio::SeqFeatureI implementing objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 # this implementation is inherited from FeatureHolderI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 =head2 feature_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 Title : feature_count
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 Usage : $seq->feature_count()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 Function: Return the number of SeqFeatures attached to a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 Returns : integer representing the number of SeqFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022 sub feature_count {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025 if (defined($self->{'_as_feat'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026 return ($#{$self->{'_as_feat'}} + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 =head2 add_SeqFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 Title : add_SeqFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035 Usage : $seq->add_SeqFeature($feat);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 $seq->add_SeqFeature(@feat);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 Function: Adds the given feature object (or each of an array of feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038 objects to the feature array of this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 sequence. The object passed is required to implement the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040 Bio::SeqFeatureI interface.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041 Returns : 1 on success
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 Args : A Bio::SeqFeatureI implementing object, or an array of such objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 sub add_SeqFeature {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048 my ($self,@feat) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050 $self->{'_as_feat'} = [] unless $self->{'_as_feat'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052 foreach my $feat ( @feat ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053 if( !$feat->isa("Bio::SeqFeatureI") ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054 $self->throw("$feat is not a SeqFeatureI and that's what we expect...");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057 # make sure we attach ourselves to the feature if the feature wants it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058 my $aseq = $self->primary_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 $feat->attach_seq($aseq) if $aseq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061 push(@{$self->{'_as_feat'}},$feat);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066 =head2 remove_SeqFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 Title : remove_SeqFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069 Usage : $seq->remove_SeqFeatures();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070 Function: Flushes all attached SeqFeatureI objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072 To remove individual feature objects, delete those from the returned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073 array and re-add the rest.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075 Returns : The array of Bio::SeqFeatureI objects removed from this seq.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 sub remove_SeqFeatures {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 return () unless $self->{'_as_feat'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085 my @feats = @{$self->{'_as_feat'}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086 $self->{'_as_feat'} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 return @feats;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 =head1 Methods provided in the Bio::PrimarySeqI interface
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093 These methods are inherited from the PrimarySeq interface
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094 and work as one expects, building new Bio::Seq objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 or other information as expected. See L<Bio::PrimarySeq>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096 for more information.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098 Sequence Features are B<not> transfered to the new objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099 This is possibly a mistake. Anyone who feels the urge in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100 dealing with this is welcome to give it a go.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102 =head2 revcom
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 Title : revcom
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 Usage : $rev = $seq->revcom()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 Function: Produces a new Bio::Seq object which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107 is the reversed complement of the sequence. For protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 sequences this throws an exception of "Sequence is a protein.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109 Cannot revcom"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111 The id is the same id as the original sequence, and the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 accession number is also identical. If someone wants to track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113 that this sequence has be reversed, it needs to define its own
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 extensions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116 To do an in-place edit of an object you can go:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 $seq = $seq->revcom();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 This of course, causes Perl to handle the garbage collection of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 the old object, but it is roughly speaking as efficient as an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 in-place edit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 Returns : A new (fresh) Bio::Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 Args : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130 =head2 trunc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132 Title : trunc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133 Usage : $subseq = $myseq->trunc(10,100);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 Function: Provides a truncation of a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137 Returns : A fresh Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138 Args : A Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143 =head2 id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 Title : id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146 Usage : $id = $seq->id()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147 Function: This is mapped on display_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 Returns : value of display_id()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 Args : [optional] value to update display_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 sub id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155 return shift->display_id(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159 =head1 Seq only methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 These methods are specific to the Bio::Seq object, and not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163 found on the Bio::PrimarySeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 =head2 primary_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167 Title : primary_seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168 Usage : $seq->primary_seq or $seq->primary_seq($newval)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169 Function: Get or set a PrimarySeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170 Example :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171 Returns : PrimarySeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 Args : None or PrimarySeq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177 sub primary_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 my ($obj,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 if( defined $value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181 if( ! ref $value || ! $value->isa('Bio::PrimarySeqI') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182 $obj->throw("$value is not a Bio::PrimarySeq compliant object");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185 $obj->{'primary_seq'} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186 # descend down over all seqfeature objects, seeing whether they
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187 # want an attached seq.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189 foreach my $sf ( $obj->get_SeqFeatures() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190 $sf->attach_seq($value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194 return $obj->{'primary_seq'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198 =head2 species
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200 Title : species
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201 Usage : $species = $seq->species() or $seq->species($species)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202 Function: Gets or sets the species
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203 Returns : L<Bio::Species> object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204 Args : None or L<Bio::Species> object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206 See L<Bio::Species> for more information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210 sub species {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211 my ($self, $species) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212 if ($species) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213 $self->{'species'} = $species;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215 return $self->{'species'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219 =head1 Internal methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223 # keep AUTOLOAD happy
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224 sub DESTROY { }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 ############################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227 # aliases due to name changes or to compensate for our lack of consistency #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 ############################################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230 # in all other modules we use the object in the singular --
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231 # lack of consistency sucks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232 *flush_SeqFeature = \&remove_SeqFeatures;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 *flush_SeqFeatures = \&remove_SeqFeatures;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235 # this is now get_SeqFeatures() (from FeatureHolderI)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236 *top_SeqFeatures = \&get_SeqFeatures;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238 # this is now get_all_SeqFeatures() in FeatureHolderI
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239 sub all_SeqFeatures{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240 return shift->get_all_SeqFeatures(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243 sub accession {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245 $self->warn(ref($self)."::accession is deprecated, ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246 "use accession_number() instead");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247 return $self->accession_number(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250 1;