annotate variant_effect_predictor/Bio/DB/GFF.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: GFF.pm,v 1.71.2.2 2003/09/12 13:29:32 lstein Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5 Bio::DB::GFF -- Storage and retrieval of sequence annotation data
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 use Bio::DB::GFF;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 # Open the sequence database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12 my $db = Bio::DB::GFF->new( -adaptor => 'dbi::mysqlopt',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 -dsn => 'dbi:mysql:elegans',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14 -fasta => '/usr/local/fasta_files'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 # fetch a 1 megabase segment of sequence starting at landmark "ZK909"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18 my $segment = $db->segment('ZK909', 1 => 1000000);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20 # pull out all transcript features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 my @transcripts = $segment->features('transcript');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 # for each transcript, total the length of the introns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24 my %totals;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25 for my $t (@transcripts) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26 my @introns = $t->Intron;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27 $totals{$t->name} += $_->length foreach @introns;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30 # Sort the exons of the first transcript by position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31 my @exons = sort {$a->start <=> $b->start} $transcripts[0]->Exon;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33 # Get a region 1000 bp upstream of first exon
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34 my $upstream = $exons[0]->segment(-1000,0);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36 # get its DNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 my $dna = $upstream->dna;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 # and get all curated polymorphisms inside it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 @polymorphisms = $upstream->contained_features('polymorphism:curated');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 # get all feature types in the database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43 my @types = $db->types;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45 # count all feature types in the segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46 my %type_counts = $segment->types(-enumerate=>1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48 # get an iterator on all curated features of type 'exon' or 'intron'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49 my $iterator = $db->get_seq_stream(-type => ['exon:curated','intron:curated']);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 while (my $s = $iterator->next_seq) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52 print $s,"\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55 # find all transcripts annotated as having function 'kinase'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56 my $iterator = $db->get_seq_stream(-type=>'transcript',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57 -attributes=>{Function=>'kinase'});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58 while (my $s = $iterator->next_seq) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59 print $s,"\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 Bio::DB::GFF provides fast indexed access to a sequence annotation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65 database. It supports multiple database types (ACeDB, relational),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 and multiple schemas through a system of adaptors and aggregators.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68 The following operations are supported by this module:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70 - retrieving a segment of sequence based on the ID of a landmark
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 - retrieving the DNA from that segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72 - finding all annotations that overlap with the segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73 - finding all annotations that are completely contained within the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74 segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 - retrieving all annotations of a particular type, either within a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76 segment, or globally
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77 - conversion from absolute to relative coordinates and back again,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78 using any arbitrary landmark for the relative coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79 - using a sequence segment to create new segments based on relative
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80 offsets
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82 The data model used by Bio::DB::GFF is compatible with the GFF flat
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 file format (http://www.sanger.ac.uk/software/GFF). The module can
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84 load a set of GFF files into the database, and serves objects that
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 have methods corresponding to GFF fields.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 The objects returned by Bio::DB::GFF are compatible with the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88 SeqFeatureI interface, allowing their use by the Bio::Graphics and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 Bio::DAS modules.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91 =head2 Auxiliary Scripts
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93 The bioperl distribution includes several scripts that make it easier
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 to work with Bio::DB::GFF databases. They are located in the scripts
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 directory under a subdirectory named Bio::DB::GFF:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 =over 4
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99 =item bp_load_gff.pl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 This script will load a Bio::DB::GFF database from a flat GFF file of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102 sequence annotations. Only the relational database version of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 Bio::DB::GFF is supported. It can be used to create the database from
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104 scratch, as well as to incrementally load new data.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 This script takes a --fasta argument to load raw DNA into the database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 as well. However, GFF databases do not require access to the raw DNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108 for most of their functionality.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110 load_gff.pl also has a --upgrade option, which will perform a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 non-destructive upgrade of older schemas to newer ones.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113 =item bp_bulk_load_gff.pl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115 This script will populate a Bio::DB::GFF database from a flat GFF file
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116 of sequence annotations. Only the MySQL database version of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 Bio::DB::GFF is supported. It uses the "LOAD DATA INFILE" query in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118 order to accelerate loading considerably; however, it can only be used
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 for the initial load, and not for updates.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 This script takes a --fasta argument to load raw DNA into the database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122 as well. However, GFF databases do not require access to the raw DNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123 for most of their functionality.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125 =item bp_fast_load_gff.pl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 This script is as fast as bp_bulk_load_gff.pl but uses Unix pipe
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128 tricks to allow for incremental updates. It only supports the MySQL
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 database version of Bio::DB::GFF and is guaranteed not to work on
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 non-Unix platforms.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132 Arguments are the same as bp_load_gff.pl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 =item gadfly_to_gff.pl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 This script will convert the GFF-like format used by the Berkeley
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 Drosophila Sequencing project into a format suitable for use with this
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138 module.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140 =item sgd_to_gff.pl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142 This script will convert the tab-delimited feature files used by the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143 Saccharomyces Genome Database into a format suitable for use with this
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144 module.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 =back
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148 =head2 GFF Fundamentals
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150 The GFF format is a flat tab-delimited file, each line of which
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 corresponds to an annotation, or feature. Each line has nine columns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152 and looks like this:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154 Chr1 curated CDS 365647 365963 . + 1 Transcript "R119.7"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 The 9 columns are as follows:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158 =over 4
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 =item 1. reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 This is the ID of the sequence that is used to establish the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 coordinate system of the annotation. In the example above, the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164 reference sequence is "Chr1".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166 =item 2. source
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 The source of the annotation. This field describes how the annotation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169 was derived. In the example above, the source is "curated" to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 indicate that the feature is the result of human curation. The names
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 and versions of software programs are often used for the source field,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172 as in "tRNAScan-SE/1.2".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 =item 3. method
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 The annotation method. This field describes the type of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177 annotation, such as "CDS". Together the method and source describe
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 the annotation type.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 =item 4. start position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 The start of the annotation relative to the reference sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184 =item 5. stop position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186 The stop of the annotation relative to the reference sequence. Start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187 is always less than or equal to stop.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189 =item 6. score
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 For annotations that are associated with a numeric score (for example,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192 a sequence similarity), this field describes the score. The score
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193 units are completely unspecified, but for sequence similarities, it is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 typically percent identity. Annotations that don't have a score can
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195 use "."
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 =item 7. strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199 For those annotations which are strand-specific, this field is the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200 strand on which the annotation resides. It is "+" for the forward
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201 strand, "-" for the reverse strand, or "." for annotations that are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 not stranded.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 =item 8. phase
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 For annotations that are linked to proteins, this field describes the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207 phase of the annotation on the codons. It is a number from 0 to 2, or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 "." for features that have no phase\.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210 =item 9. group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 GFF provides a simple way of generating annotation hierarchies ("is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213 composed of" relationships) by providing a group field. The group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 field contains the class and ID of an annotation which is the logical
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 parent of the current one. In the example given above, the group is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 the Transcript named "R119.7".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218 The group field is also used to store information about the target of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 sequence similarity hits, and miscellaneous notes. See the next
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220 section for a description of how to describe similarity targets.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 The format of the group fields is "Class ID" with a single space (not
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223 a tab) separating the class from the ID. It is VERY IMPORTANT to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224 follow this format, or grouping will not work properly.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226 =back
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228 The sequences used to establish the coordinate system for annotations
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 can correspond to sequenced clones, clone fragments, contigs or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230 super-contigs. Thus, this module can be used throughout the lifecycle
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 of a sequencing project.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233 In addition to a group ID, the GFF format allows annotations to have a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 group class. For example, in the ACeDB representation, RNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 interference experiments have a class of "RNAi" and an ID that is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236 unique among the RNAi experiments. Since not all databases support
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 this notion, the class is optional in all calls to this module, and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238 defaults to "Sequence" when not provided.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 Double-quotes are sometimes used in GFF files around components of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241 group field. Strictly, this is only necessary if the group name or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 class contains whitespace.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244 =head2 Making GFF files work with this module
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 Some annotations do not need to be individually named. For example,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247 it is probably not useful to assign a unique name to each ALU repeat
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 in a vertebrate genome. Others, such as predicted genes, correspond
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249 to named biological objects; you probably want to be able to fetch the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 positions of these objects by referring to them by name.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252 To accomodate named annotations, the GFF format places the object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253 class and name in the group field. The name identifies the object,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254 and the class prevents similarly-named objects, for example clones and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255 sequences, from collding.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 A named object is shown in the following excerpt from a GFF file:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 Chr1 curated transcript 939627 942410 . + . Transcript Y95B8A.2
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261 This object is a predicted transcript named Y95BA.2. In this case,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 the group field is used to identify the class and name of the object,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263 even though no other annotation belongs to that group.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265 It now becomes possible to retrieve the region of the genome covered
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266 by transcript Y95B8A.2 using the segment() method:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268 $segment = $db->segment(-class=>'Transcript',-name=>'Y95B8A.2');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270 It is not necessary for the annotation's method to correspond to the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271 object class, although this is commonly the case.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 As explained above, each annotation in a GFF file refers to a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274 reference sequence. It is important that each reference sequence also
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275 be identified by a line in the GFF file. This allows the Bio::DB::GFF
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276 module to determine the length and class of the reference sequence,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277 and makes it possible to do relative arithmetic.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279 For example, if "Chr1" is used as a reference sequence, then it should
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280 have an entry in the GFF file similar to this one:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 Chr1 assembly chromosome 1 14972282 . + . Sequence Chr1
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284 This indicates that the reference sequence named "Chr1" has length
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285 14972282 bp, method "chromosome" and source "assembly". In addition,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 as indicated by the group field, Chr1 has class "Sequence" and name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287 "Chr1".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289 The object class "Sequence" is used by default when the class is not
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 specified in the segment() call. This allows you to use a shortcut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291 form of the segment() method:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293 $segment = $db->segment('Chr1'); # whole chromosome
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 $segment = $db->segment('Chr1',1=>1000); # first 1000 bp
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296 For your convenience, if, during loading a GFF file, Bio::DB::GFF
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297 encounters a line like the following:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299 ##sequence-region Chr1 1 14972282
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301 It will automatically generate the following entry:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303 Chr1 reference Component 1 14972282 . + . Sequence Chr1
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305 This is sufficient to use Chr1 as a reference point.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306 The ##sequence-region line is frequently found in the GFF files
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307 distributed by annotation groups.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309 =head2 Sequence alignments
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311 There are two cases in which an annotation indicates the relationship
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 between two sequences. The first case is a similarity hit, where the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313 annotation indicates an alignment. The second case is a map assembly,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314 in which the annotation indicates that a portion of a larger sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315 is built up from one or more smaller ones.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317 Both cases are indicated by using the B<Target> tag in the group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318 field. For example, a typical similarity hit will look like this:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320 Chr1 BLASTX similarity 76953 77108 132 + 0 Target Protein:SW:ABL_DROME 493 544
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322 The group field contains the Target tag, followed by an identifier for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323 the biological object referred to. The GFF format uses the notation
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324 I<Class>:I<Name> for the biological object, and even though this is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 stylistically inconsistent, that's the way it's done. The object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326 identifier is followed by two integers indicating the start and stop
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327 of the alignment on the target sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329 Unlike the main start and stop columns, it is possible for the target
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330 start to be greater than the target end. The previous example
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 indicates that the the section of Chr1 from 76,953 to 77,108 aligns to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332 the protein SW:ABL_DROME starting at position 493 and extending to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333 position 544.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 A similar notation is used for sequence assembly information as shown
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336 in this example:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338 Chr1 assembly Link 10922906 11177731 . . . Target Sequence:LINK_H06O01 1 254826
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 LINK_H06O01 assembly Cosmid 32386 64122 . . . Target Sequence:F49B2 6 31742
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341 This indicates that the region between bases 10922906 and 11177731 of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342 Chr1 are composed of LINK_H06O01 from bp 1 to bp 254826. The region
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343 of LINK_H0601 between 32386 and 64122 is, in turn, composed of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344 bases 5 to 31742 of cosmid F49B2.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346 =head2 Attributes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348 While not intended to serve as a general-purpose sequence database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 (see bioperl-db for that), GFF allows you to tag features with
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350 arbitrary attributes. Attributes appear in the Group field following
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351 the initial class/name pair. For example:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353 Chr1 cur trans 939 942 . + . Transcript Y95B8A.2 ; Gene sma-3 ; Alias sma3
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355 This line tags the feature named Transcript Y95B8A.2 as being "Gene"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356 named sma-3 and having the Alias "sma3". Features having these
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357 attributes can be looked up using the fetch_feature_by_attribute() method.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 Two attributes have special meaning: "Note" is for backward
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360 compatibility and is used for unstructured text remarks. "Alias" is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361 considered as a synonym for the feature name and will be consulted
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 when looking up a feature by its name.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364 =head2 Adaptors and Aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366 This module uses a system of adaptors and aggregators in order to make
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 it adaptable to use with a variety of databases.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369 =over 4
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 =item Adaptors
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373 The core of the module handles the user API, annotation coordinate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 arithmetic, and other common issues. The details of fetching
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 information from databases is handled by an adaptor, which is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376 specified during Bio::DB::GFF construction. The adaptor encapsulates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 database-specific information such as the schema, user authentication
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378 and access methods.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380 Currently there are two adaptors: 'dbi::mysql' and 'dbi::mysqlopt'.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381 The former is an interface to a simple Mysql schema. The latter is an
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 optimized version of dbi::mysql which uses a binning scheme to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383 accelerate range queries and the Bio::DB::Fasta module for rapid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 retrieval of sequences. Note the double-colon between the words.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 =item Aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 The GFF format uses a "group" field to indicate aggregation properties
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 of individual features. For example, a set of exons and introns may
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 share a common transcript group, and multiple transcripts may share
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391 the same gene group.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 Aggregators are small modules that use the group information to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394 rebuild the hierarchy. When a Bio::DB::GFF object is created, you
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 indicate that it use a set of one or more aggregators. Each
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396 aggregator provides a new composite annotation type. Before the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 database query is generated each aggregator is called to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398 "disaggregate" its annotation type into list of component types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 contained in the database. After the query is generated, each
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400 aggregator is called again in order to build composite annotations
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401 from the returned components.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403 For example, during disaggregation, the standard
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404 "processed_transcript" aggregator generates a list of component
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405 feature types including "UTR", "CDS", and "polyA_site". Later, it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406 aggregates these features into a set of annotations of type
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407 "processed_transcript".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 During aggregation, the list of aggregators is called in reverse
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410 order. This allows aggregators to collaborate to create multi-level
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411 structures: the transcript aggregator assembles transcripts from
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 introns and exons; the gene aggregator then assembles genes from sets
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413 of transcripts.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415 Three default aggregators are provided:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417 transcript assembles transcripts from features of type
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418 exon, CDS, 5'UTR, 3'UTR, TSS, and PolyA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419 clone assembles clones from Clone_left_end, Clone_right_end
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420 and Sequence features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421 alignment assembles gapped alignments from features of type
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422 "similarity".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424 In addition, this module provides the optional "wormbase_gene"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425 aggregator, which accomodates the WormBase representation of genes.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 This aggregator aggregates features of method "exon", "CDS", "5'UTR",
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427 "3'UTR", "polyA" and "TSS" into a single object. It also expects to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428 find a single feature of type "Sequence" that spans the entire gene.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430 The existing aggregators are easily customized.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 Note that aggregation will not occur unless you specifically request
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433 the aggregation type. For example, this call:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435 @features = $segment->features('alignment');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437 will generate an array of aggregated alignment features. However,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438 this call:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440 @features = $segment->features();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442 will return a list of unaggregated similarity segments.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444 For more informnation, see the manual pages for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445 Bio::DB::GFF::Aggregator::processed_transcript, Bio::DB::GFF::Aggregator::clone,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446 etc.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
448 =back
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
449
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
450 =head1 API
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
451
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
452 The following is the API for Bio::DB::GFF.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
453
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
454 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
455
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
456 package Bio::DB::GFF;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
457
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
458 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
459
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
460 use Bio::DB::GFF::Util::Rearrange;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
461 use Bio::DB::GFF::RelSegment;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
462 use Bio::DB::GFF::Feature;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
463 use Bio::DB::GFF::Aggregator;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
464 use Bio::DasI;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
465 use Bio::Root::Root;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
466
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
467 use vars qw(@ISA $VERSION);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
468 @ISA = qw(Bio::Root::Root Bio::DasI);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
469
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
470 $VERSION = '1.2003';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
471 my %valid_range_types = (overlaps => 1,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
472 contains => 1,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
473 contained_in => 1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
474
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
475 =head1 Querying GFF Databases
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
476
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
477 =head2 new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
478
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
479 Title : new
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
480 Usage : my $db = new Bio::DB::GFF(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
481 Function: create a new Bio::DB::GFF object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
482 Returns : new Bio::DB::GFF object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
483 Args : lists of adaptors and aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
484 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
485
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
486 These are the arguments:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
487
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
488 -adaptor Name of the adaptor module to use. If none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
489 provided, defaults to "dbi::mysqlopt".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
490
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
491 -aggregator Array reference to a list of aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
492 to apply to the database. If none provided,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
493 defaults to ['processed_transcript','alignment'].
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
494
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
495 <other> Any other named argument pairs are passed to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
496 the adaptor for processing.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
497
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
498 The adaptor argument must correspond to a module contained within the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
499 Bio::DB::GFF::Adaptor namespace. For example, the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
500 Bio::DB::GFF::Adaptor::dbi::mysql adaptor is loaded by specifying
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
501 'dbi::mysql'. By Perl convention, the adaptors names are lower case
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
502 because they are loaded at run time.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
503
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
504 The aggregator array may contain a list of aggregator names, a list of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
505 initialized aggregator objects, or a string in the form
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
506 "aggregator_name{subpart1,subpart2,subpart3/main_method}" (the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
507 /main_method part is optional). For example, if you wish to change
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
508 the components aggregated by the transcript aggregator, you could pass
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
509 it to the GFF constructor this way:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
510
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
511 my $transcript =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
512 Bio::DB::Aggregator::transcript->new(-sub_parts=>[qw(exon intron utr
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
513 polyA spliced_leader)]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
514
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
515 my $db = Bio::DB::GFF->new(-aggregator=>[$transcript,'clone','alignment],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
516 -adaptor => 'dbi::mysql',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
517 -dsn => 'dbi:mysql:elegans42');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
518
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
519 Alternatively, you could create an entirely new transcript aggregator
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
520 this way:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
521
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
522 my $new_agg = 'transcript{exon,intron,utr,polyA,spliced_leader}';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
523 my $db = Bio::DB::GFF->new(-aggregator=>[$new_agg,'clone','alignment],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
524 -adaptor => 'dbi::mysql',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
525 -dsn => 'dbi:mysql:elegans42');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
526
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
527 See L<Bio::DB::GFF::Aggregator> for more details.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
528
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
529 The commonly used 'dbi::mysql' adaptor recognizes the following
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
530 adaptor-specific arguments:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
531
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
532 Argument Description
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
533 -------- -----------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
534
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
535 -dsn the DBI data source, e.g. 'dbi:mysql:ens0040'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
536 If a partial name is given, such as "ens0040", the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
537 "dbi:mysql:" prefix will be added automatically.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
538
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
539 -user username for authentication
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
540
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
541 -pass the password for authentication
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
542
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
543 -refclass landmark Class; defaults to "Sequence"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
544
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
545 The commonly used 'dbi::mysqlopt' adaptor also recogizes the following
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
546 arguments.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
547
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
548 Argument Description
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
549 -------- -----------
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
550
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
551 -fasta path to a directory containing FASTA files for the DNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
552 contained in this database (e.g. "/usr/local/share/fasta")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
553
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
554 -acedb an acedb URL to use when converting features into ACEDB
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
555 objects (e.g. sace://localhost:2005)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
556
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
557 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
558
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
559 #'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
560
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
561 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
562 my $package = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
563 my ($adaptor,$aggregators,$args,$refclass);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
564
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
565 if (@_ == 1) { # special case, default to dbi::mysqlopt
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
566 $adaptor = 'dbi::mysqlopt';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
567 $args = {DSN => shift};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
568 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
569 ($adaptor,$aggregators,$refclass,$args) = rearrange([
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
570 [qw(ADAPTOR FACTORY)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
571 [qw(AGGREGATOR AGGREGATORS)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
572 'REFCLASS',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
573 ],@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
574 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
575
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
576 $adaptor ||= 'dbi::mysqlopt';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
577 my $class = "Bio::DB::GFF::Adaptor::\L${adaptor}\E";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
578 eval "require $class" unless $class->can('new');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
579 $package->throw("Unable to load $adaptor adaptor: $@") if $@;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
580
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
581 my $self = $class->new($args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
582 $self->default_class($refclass) if defined $refclass;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
583
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
584 # handle the aggregators.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
585 # aggregators are responsible for creating complex multi-part features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
586 # from the GFF "group" field. If none are provided, then we provide a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
587 # list of the two used in WormBase.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
588 # Each aggregator can be a scalar or a ref. In the former case
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
589 # it is treated as a class name to call new() on. In the latter
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
590 # the aggreator is treated as a ready made object.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
591 $aggregators = $self->default_aggregators unless defined $aggregators;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
592 my @a = ref($aggregators) eq 'ARRAY' ? @$aggregators : $aggregators;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
593 for my $a (@a) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
594 $self->add_aggregator($a);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
595 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
596
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
597 # default settings go here.....
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
598 $self->automerge(1); # set automerge to true
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
599
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
600 $self;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
601 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
602
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
603
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
604 =head2 types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
605
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
606 Title : types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
607 Usage : $db->types(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
608 Function: return list of feature types in range or database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
609 Returns : a list of Bio::DB::GFF::Typename objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
610 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
611 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
612
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
613 This routine returns a list of feature types known to the database.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
614 The list can be database-wide or restricted to a region. It is also
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
615 possible to find out how many times each feature occurs.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
616
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
617 For range queries, it is usually more convenient to create a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
618 Bio::DB::GFF::Segment object, and then invoke it's types() method.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
619
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
620 Arguments are as follows:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
621
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
622 -ref ID of reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
623 -class class of reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
624 -start start of segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
625 -stop stop of segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
626 -enumerate if true, count the features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
627
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
628 The returned value will be a list of Bio::DB::GFF::Typename objects,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
629 which if evaluated in a string context will return the feature type in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
630 "method:source" format. This object class also has method() and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
631 source() methods for retrieving the like-named fields.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
632
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
633 If -enumerate is true, then the function returns a hash (not a hash
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
634 reference) in which the keys are type names in "method:source" format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
635 and the values are the number of times each feature appears in the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
636 database or segment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
637
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
638 The argument -end is a synonum for -stop, and -count is a synonym for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
639 -enumerate.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
640
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
641 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
642
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
643 sub types {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
644 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
645 my ($refseq,$start,$stop,$enumerate,$refclass,$types) = rearrange ([
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
646 [qw(REF REFSEQ)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
647 qw(START),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
648 [qw(STOP END)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
649 [qw(ENUMERATE COUNT)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
650 [qw(CLASS SEQCLASS)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
651 [qw(TYPE TYPES)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
652 ],@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
653 $types = $self->parse_types($types) if defined $types;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
654 $self->get_types($refseq,$refclass,$start,$stop,$enumerate,$types);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
655 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
656
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
657 =head2 classes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
658
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
659 Title : classes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
660 Usage : $db->classes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
661 Function: return list of landmark classes in database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
662 Returns : a list of classes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
663 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
664 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
665
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
666 This routine returns the list of reference classes known to the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
667 database, or empty if classes are not used by the database. Classes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
668 are distinct from types, being essentially qualifiers on the reference
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
669 namespaces.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
670
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
671 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
672
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
673 sub classes {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
674 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
675 return ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
676 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
677
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
678 =head2 segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
679
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
680 Title : segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
681 Usage : $db->segment(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
682 Function: create a segment object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
683 Returns : segment object(s)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
684 Args : numerous, see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
685 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
686
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
687 This method generates a segment object, which is a Perl object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
688 subclassed from Bio::DB::GFF::Segment. The segment can be used to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
689 find overlapping features and the raw DNA.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
690
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
691 When making the segment() call, you specify the ID of a sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
692 landmark (e.g. an accession number, a clone or contig), and a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
693 positional range relative to the landmark. If no range is specified,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
694 then the entire extent of the landmark is used to generate the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
695 segment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
696
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
697 You may also provide the ID of a "reference" sequence, which will set
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
698 the coordinate system and orientation used for all features contained
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
699 within the segment. The reference sequence can be changed later. If
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
700 no reference sequence is provided, then the coordinate system is based
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
701 on the landmark.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
702
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
703 Arguments:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
704
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
705 -name ID of the landmark sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
706
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
707 -class Database object class for the landmark sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
708 "Sequence" assumed if not specified. This is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
709 irrelevant for databases which do not recognize
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
710 object classes.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
711
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
712 -start Start of the segment relative to landmark. Positions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
713 follow standard 1-based sequence rules. If not specified,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
714 defaults to the beginning of the landmark.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
715
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
716 -end Stop of the segment relative to the landmark. If not specified,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
717 defaults to the end of the landmark.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
718
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
719 -stop Same as -end.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
720
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
721 -offset For those who prefer 0-based indexing, the offset specifies the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
722 position of the new segment relative to the start of the landmark.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
723
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
724 -length For those who prefer 0-based indexing, the length specifies the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
725 length of the new segment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
726
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
727 -refseq Specifies the ID of the reference landmark used to establish the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
728 coordinate system for the newly-created segment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
729
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
730 -refclass Specifies the class of the reference landmark, for those databases
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
731 that distinguish different object classes. Defaults to "Sequence".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
732
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
733 -absolute
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
734 Return features in absolute coordinates rather than relative to the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
735 parent segment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
736
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
737 -nocheck Don't check the database for the coordinates and length of this
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
738 feature. Construct a segment using the indicated name as the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
739 reference, a start coordinate of 1, an undefined end coordinate,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
740 and a strand of +1.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
741
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
742 -force Same as -nocheck.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
743
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
744 -seq,-sequence,-sourceseq Aliases for -name.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
745
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
746 -begin,-end Aliases for -start and -stop
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
747
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
748 -off,-len Aliases for -offset and -length
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
749
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
750 -seqclass Alias for -class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
751
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
752 Here's an example to explain how this works:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
753
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
754 my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:human',-adaptor=>'dbi::mysql');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
755
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
756 If successful, $db will now hold the database accessor object. We now
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
757 try to fetch the fragment of sequence whose ID is A0000182 and class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
758 is "Accession."
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
759
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
760 my $segment = $db->segment(-name=>'A0000182',-class=>'Accession');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
761
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
762 If successful, $segment now holds the entire segment corresponding to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
763 this accession number. By default, the sequence is used as its own
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
764 reference sequence, so its first base will be 1 and its last base will
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
765 be the length of the accession.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
766
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
767 Assuming that this sequence belongs to a longer stretch of DNA, say a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
768 contig, we can fetch this information like so:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
769
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
770 my $sourceseq = $segment->sourceseq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
771
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
772 and find the start and stop on the source like this:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
773
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
774 my $start = $segment->abs_start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
775 my $stop = $segment->abs_stop;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
776
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
777 If we had another segment, say $s2, which is on the same contiguous
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
778 piece of DNA, we can pass that to the refseq() method in order to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
779 establish it as the coordinate reference point:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
780
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
781 $segment->refseq($s2);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
782
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
783 Now calling start() will return the start of the segment relative to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
784 the beginning of $s2, accounting for differences in strandedness:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
785
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
786 my $rel_start = $segment->start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
787
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
788 IMPORTANT NOTE: This method can be used to return the segment spanned
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
789 by an arbitrary named annotation. However, if the annotation appears
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
790 at multiple locations on the genome, for example an EST that maps to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
791 multiple locations, then, provided that all locations reside on the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
792 same physical segment, the method will return a segment that spans the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
793 minimum and maximum positions. If the reference sequence occupies
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
794 ranges on different physical segments, then it returns them all in an
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
795 array context, and raises a "multiple segment exception" exception in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
796 a scalar context.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
797
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
798 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
799
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
800 #'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
801
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
802 sub segment {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
803 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
804 my @segments = Bio::DB::GFF::RelSegment->new(-factory => $self,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
805 $self->setup_segment_args(@_));
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
806 foreach (@segments) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
807 $_->absolute(1) if $self->absolute;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
808 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
809
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
810 $self->_multiple_return_args(@segments);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
811 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
812
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
813 sub _multiple_return_args {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
814 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
815 my @args = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
816 if (@args == 0) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
817 return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
818 } elsif (@args == 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
819 return $args[0];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
820 } elsif (wantarray) { # more than one reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
821 return @args;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
822 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
823 $self->error($args[0]->name,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
824 " has more than one reference sequence in database. Please call in a list context to retrieve them all.");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
825 $self->throw('multiple segment exception');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
826 return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
827 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
828
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
829 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
830
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
831 # backward compatibility -- don't use!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
832 # (deliberately undocumented too)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
833 sub abs_segment {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
834 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
835 return $self->segment($self->setup_segment_args(@_),-absolute=>1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
836 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
837
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
838 sub setup_segment_args {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
839 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
840 return @_ if defined $_[0] && $_[0] =~ /^-/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
841 return (-name=>$_[0],-start=>$_[1],-stop=>$_[2]) if @_ == 3;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
842 return (-class=>$_[0],-name=>$_[1]) if @_ == 2;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
843 return (-name=>$_[0]) if @_ == 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
844 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
845
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
846 =head2 features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
847
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
848 Title : features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
849 Usage : $db->features(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
850 Function: get all features, possibly filtered by type
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
851 Returns : a list of Bio::DB::GFF::Feature objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
852 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
853 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
854
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
855 This routine will retrieve features in the database regardless of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
856 position. It can be used to return all features, or a subset based on
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
857 their method and source.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
858
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
859 Arguments are as follows:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
860
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
861 -types List of feature types to return. Argument is an array
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
862 reference containing strings of the format "method:source"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
863
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
864 -merge Whether to apply aggregators to the generated features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
865
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
866 -rare Turn on optimizations suitable for a relatively rare feature type,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
867 where it makes more sense to filter by feature type first,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
868 and then by position.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
869
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
870 -attributes A hash reference containing attributes to match.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
871
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
872 -iterator Whether to return an iterator across the features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
873
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
874 -binsize A true value will create a set of artificial features whose
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
875 start and stop positions indicate bins of the given size, and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
876 whose scores are the number of features in the bin. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
877 class and method of the feature will be set to "bin",
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
878 its source to "method:source", and its group to "bin:method:source".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
879 This is a handy way of generating histograms of feature density.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
880
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
881 If -iterator is true, then the method returns a single scalar value
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
882 consisting of a Bio::SeqIO object. You can call next_seq() repeatedly
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
883 on this object to fetch each of the features in turn. If iterator is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
884 false or absent, then all the features are returned as a list.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
885
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
886 Currently aggregation is disabled when iterating over a series of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
887 features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
888
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
889 Types are indicated using the nomenclature "method:source". Either of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
890 these fields can be omitted, in which case a wildcard is used for the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
891 missing field. Type names without the colon (e.g. "exon") are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
892 interpreted as the method name and a source wild card. Regular
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
893 expressions are allowed in either field, as in: "similarity:BLAST.*".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
894
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
895 The -attributes argument is a hashref containing one or more attributes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
896 to match against:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
897
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
898 -attributes => { Gene => 'abc-1',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
899 Note => 'confirmed' }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
900
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
901 Attribute matching is simple string matching, and multiple attributes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
902 are ANDed together.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
903
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
904 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
905
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
906 sub features {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
907 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
908 my ($types,$automerge,$sparse,$iterator,$other);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
909 if (defined $_[0] &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
910 $_[0] =~ /^-/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
911 ($types,$automerge,$sparse,$iterator,$other) = rearrange([
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
912 [qw(TYPE TYPES)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
913 [qw(MERGE AUTOMERGE)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
914 [qw(RARE SPARSE)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
915 'ITERATOR'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
916 ],@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
917 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
918 $types = \@_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
919 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
920
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
921 # for whole database retrievals, we probably don't want to automerge!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
922 $automerge = $self->automerge unless defined $automerge;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
923 $other ||= {};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
924 $self->_features({
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
925 rangetype => 'contains',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
926 types => $types,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
927 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
928 { sparse => $sparse,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
929 automerge => $automerge,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
930 iterator =>$iterator,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
931 %$other,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
932 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
933 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
934 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
935
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
936 =head2 get_seq_stream
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
937
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
938 Title : get_seq_stream
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
939 Usage : my $seqio = $self->get_seq_sream(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
940 Function: Performs a query and returns an iterator over it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
941 Returns : a Bio::SeqIO stream capable of producing sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
942 Args : As in features()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
943 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
944
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
945 This routine takes the same arguments as features(), but returns a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
946 Bio::SeqIO::Stream-compliant object. Use it like this:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
947
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
948 $stream = $db->get_seq_stream('exon');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
949 while (my $exon = $stream->next_seq) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
950 print $exon,"\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
951 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
952
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
953 NOTE: This is also called get_feature_stream(), since that's what it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
954 really does.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
955
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
956 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
957
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
958 sub get_seq_stream {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
959 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
960 my @args = !defined($_[0]) || $_[0] =~ /^-/ ? (@_,-iterator=>1)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
961 : (-types=>\@_,-iterator=>1);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
962 $self->features(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
963 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
964
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
965 *get_feature_stream = \&get_seq_stream;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
966
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
967 =head2 get_feature_by_name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
968
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
969 Title : get_feature_by_name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
970 Usage : $db->get_feature_by_name($class => $name)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
971 Function: fetch features by their name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
972 Returns : a list of Bio::DB::GFF::Feature objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
973 Args : the class and name of the desired feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
974 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
975
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
976 This method can be used to fetch a named feature from the database.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
977 GFF annotations are named using the group class and name fields, so
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
978 for features that belong to a group of size one, this method can be
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
979 used to retrieve that group (and is equivalent to the segment()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
980 method). Any Alias attributes are also searched for matching names.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
981
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
982 An alternative syntax allows you to search for features by name within
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
983 a circumscribed region:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
984
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
985 @f = $db->get_feature_by_name(-class => $class,-name=>$name,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
986 -ref => $sequence_name,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
987 -start => $start,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
988 -end => $end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
989
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
990 This method may return zero, one, or several Bio::DB::GFF::Feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
991 objects.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
992
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
993 Aggregation is performed on features as usual.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
994
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
995 NOTE: At various times, this function was called fetch_group(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
996 fetch_feature(), fetch_feature_by_name() and segments(). These names
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
997 are preserved for backward compatibility.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
998
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
999 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1000
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1001 sub get_feature_by_name {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1002 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1003 my ($gclass,$gname,$automerge,$ref,$start,$end);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1004 if (@_ == 1) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1005 $gclass = $self->default_class;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1006 $gname = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1007 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1008 ($gclass,$gname,$automerge,$ref,$start,$end) = rearrange(['CLASS','NAME','AUTOMERGE',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1009 ['REF','REFSEQ'],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1010 'START',['STOP','END']
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1011 ],@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1012 $gclass ||= $self->default_class;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1013 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1014 $automerge = $self->automerge unless defined $automerge;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1015
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1016 # we need to refactor this... It's repeated code (see below)...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1017 my @aggregators;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1018 if ($automerge) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1019 for my $a ($self->aggregators) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1020 push @aggregators,$a if $a->disaggregate([],$self);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1021 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1022 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1023
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1024 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1025 my $features = [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1026 my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1027 my $location = [$ref,$start,$end] if defined $ref;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1028 $self->_feature_by_name($gclass,$gname,$location,$callback);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1029
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1030 warn "aggregating...\n" if $self->debug;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1031 foreach my $a (@aggregators) { # last aggregator gets first shot
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1032 $a->aggregate($features,$self) or next;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1033 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1034
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1035 @$features;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1036 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1037
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1038 # horrible indecision regarding proper names!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1039 *fetch_group = *fetch_feature = *fetch_feature_by_name = \&get_feature_by_name;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1040 *segments = \&segment;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1041
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1042 =head2 get_feature_by_target
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1043
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1044 Title : get_feature_by_target
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1045 Usage : $db->get_feature_by_target($class => $name)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1046 Function: fetch features by their similarity target
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1047 Returns : a list of Bio::DB::GFF::Feature objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1048 Args : the class and name of the desired feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1049 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1050
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1051 This method can be used to fetch a named feature from the database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1052 based on its similarity hit.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1053
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1054 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1055
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1056 sub get_feature_by_target {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1057 shift->get_feature_by_name(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1058 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1059
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1060 =head2 get_feature_by_attribute
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1061
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1062 Title : get_feature_by_attribute
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1063 Usage : $db->get_feature_by_attribute(attribute1=>value1,attribute2=>value2)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1064 Function: fetch segments by combinations of attribute values
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1065 Returns : a list of Bio::DB::GFF::Feature objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1066 Args : the class and name of the desired feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1067 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1068
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1069 This method can be used to fetch a set of features from the database.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1070 Attributes are a list of name=E<gt>value pairs. They will be logically
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1071 ANDED together.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1072
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1073 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1074
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1075 sub get_feature_by_attribute {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1076 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1077 my %attributes = ref($_[0]) ? %{$_[0]} : @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1078
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1079 # we need to refactor this... It's repeated code (see above)...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1080 my @aggregators;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1081 if ($self->automerge) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1082 for my $a ($self->aggregators) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1083 unshift @aggregators,$a if $a->disaggregate([],$self);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1084 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1085 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1086
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1087 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1088 my $features = [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1089 my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1090 $self->_feature_by_attribute(\%attributes,$callback);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1091
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1092 warn "aggregating...\n" if $self->debug;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1093 foreach my $a (@aggregators) { # last aggregator gets first shot
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1094 $a->aggregate($features,$self) or next;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1095 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1096
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1097 @$features;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1098 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1099
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1100 # more indecision...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1101 *fetch_feature_by_attribute = \&get_feature_by_attribute;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1102
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1103 =head2 get_feature_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1104
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1105 Title : get_feature_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1106 Usage : $db->get_feature_by_id($id)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1107 Function: fetch segments by feature ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1108 Returns : a Bio::DB::GFF::Feature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1109 Args : the feature ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1110 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1111
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1112 This method can be used to fetch a feature from the database using its
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1113 ID. Not all GFF databases support IDs, so be careful with this.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1114
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1115 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1116
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1117 sub get_feature_by_id {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1118 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1119 my $id = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1120 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1121 my $features = [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1122 my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1123 $self->_feature_by_id($id,'feature',$callback);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1124 return wantarray ? @$features : $features->[0];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1125 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1126 *fetch_feature_by_id = \&get_feature_by_id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1127
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1128 =head2 get_feature_by_gid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1129
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1130 Title : get_feature_by_gid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1131 Usage : $db->get_feature_by_gid($id)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1132 Function: fetch segments by feature ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1133 Returns : a Bio::DB::GFF::Feature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1134 Args : the feature ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1135 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1136
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1137 This method can be used to fetch a feature from the database using its
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1138 group ID. Not all GFF databases support IDs, so be careful with this.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1139
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1140 The group ID is often more interesting than the feature ID, since
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1141 groups can be complex objects containing subobjects.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1142
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1143 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1144
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1145 sub get_feature_by_gid {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1146 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1147 my $id = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1148 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1149 my $features = [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1150 my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1151 $self->_feature_by_id($id,'group',$callback);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1152 return wantarray ? @$features : $features->[0];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1153 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1154 *fetch_feature_by_gid = \&get_feature_by_gid;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1155
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1156 =head2 delete_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1157
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1158 Title : delete_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1159 Usage : $db->delete_features(@ids_or_features)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1160 Function: delete one or more features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1161 Returns : count of features deleted
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1162 Args : list of features or feature ids
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1163 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1164
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1165 Pass this method a list of numeric feature ids or a set of features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1166 It will attempt to remove the features from the database and return a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1167 count of the features removed.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1168
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1169 NOTE: This method is also called delete_feature(). Also see
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1170 delete_groups().
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1171
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1172 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1173
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1174 *delete_feature = \&delete_features;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1175
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1176 sub delete_features {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1177 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1178 my @features_or_ids = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1179 my @ids = map {UNIVERSAL::isa($_,'Bio::DB::GFF::Feature') ? $_->id : $_} @features_or_ids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1180 return unless @ids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1181 $self->_delete_features(@ids);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1182 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1183
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1184 =head2 delete_groups
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1185
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1186 Title : delete_groups
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1187 Usage : $db->delete_groups(@ids_or_features)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1188 Function: delete one or more feature groups
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1189 Returns : count of features deleted
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1190 Args : list of features or feature group ids
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1191 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1192
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1193 Pass this method a list of numeric group ids or a set of features. It
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1194 will attempt to recursively remove the features and ALL members of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1195 their group from the database. It returns a count of the number of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1196 features (not groups) returned.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1197
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1198 NOTE: This method is also called delete_group(). Also see
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1199 delete_features().
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1200
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1201 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1202
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1203 *delete_group = \&delete_groupss;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1204
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1205 sub delete_groups {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1206 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1207 my @features_or_ids = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1208 my @ids = map {UNIVERSAL::isa($_,'Bio::DB::GFF::Feature') ? $_->group_id : $_} @features_or_ids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1209 return unless @ids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1210 $self->_delete_groups(@ids);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1211 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1212
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1213 =head2 delete
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1214
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1215 Title : delete
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1216 Usage : $db->delete(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1217 Function: delete features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1218 Returns : count of features deleted -- if available
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1219 Args : numerous, see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1220 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1221
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1222 This method deletes all features that overlap the specified region or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1223 are of a particular type. If no arguments are provided and the -force
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1224 argument is true, then deletes ALL features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1225
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1226 Arguments:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1227
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1228 -name ID of the landmark sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1229
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1230 -ref ID of the landmark sequence (synonym for -name).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1231
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1232 -class Database object class for the landmark sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1233 "Sequence" assumed if not specified. This is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1234 irrelevant for databases which do not recognize
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1235 object classes.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1236
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1237 -start Start of the segment relative to landmark. Positions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1238 follow standard 1-based sequence rules. If not specified,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1239 defaults to the beginning of the landmark.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1240
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1241 -end Stop of the segment relative to the landmark. If not specified,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1242 defaults to the end of the landmark.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1243
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1244 -offset Zero-based addressing
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1245
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1246 -length Length of region
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1247
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1248 -type,-types Either a single scalar type to be deleted, or an
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1249 reference to an array of types.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1250
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1251 -force Force operation to be performed even if it would delete
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1252 entire feature table.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1253
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1254 -range_type Control the range type of the deletion. One of "overlaps" (default)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1255 "contains" or "contained_in"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1256
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1257 Examples:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1258
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1259 $db->delete(-type=>['intron','repeat:repeatMasker']); # remove all introns & repeats
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1260 $db->delete(-name=>'chr3',-start=>1,-end=>1000); # remove annotations on chr3 from 1 to 1000
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1261 $db->delete(-name=>'chr3',-type=>'exon'); # remove all exons on chr3
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1262
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1263 The short form of this call, as described in segment() is also allowed:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1264
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1265 $db->delete("chr3",1=>1000);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1266 $db->delete("chr3");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1267
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1268 IMPORTANT NOTE: This method only deletes features. It does *NOT*
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1269 delete the names of groups that contain the deleted features. Group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1270 IDs will be reused if you later load a feature with the same group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1271 name as one that was previously deleted.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1272
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1273 NOTE ON FEATURE COUNTS: The DBI-based versions of this call return the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1274 result code from the SQL DELETE operation. Some dbd drivers return the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1275 count of rows deleted, while others return 0E0. Caveat emptor.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1276
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1277 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1278
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1279 sub delete {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1280 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1281 my @args = $self->setup_segment_args(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1282 my ($name,$class,$start,$end,$offset,$length,$type,$force,$range_type) =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1283 rearrange([['NAME','REF'],'CLASS','START',[qw(END STOP)],'OFFSET',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1284 'LENGTH',[qw(TYPE TYPES)],'FORCE','RANGE_TYPE'],@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1285 $offset = 0 unless defined $offset;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1286 $start = $offset+1 unless defined $start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1287 $end = $start+$length-1 if !defined $end and $length;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1288 $class ||= $self->default_class;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1289
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1290 my $types = $self->parse_types($type); # parse out list of types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1291
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1292 $range_type ||= 'overlaps';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1293 $self->throw("range type must be one of {".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1294 join(',',keys %valid_range_types).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1295 "}\n")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1296 unless $valid_range_types{lc $range_type};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1297
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1298
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1299 my @segments;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1300 if (defined $name && $name ne '') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1301 my @args = (-name=>$name,-class=>$class);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1302 push @args,(-start=>$start) if defined $start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1303 push @args,(-end =>$end) if defined $end;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1304 @segments = $self->segment(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1305 return unless @segments;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1306 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1307 $self->_delete({segments => \@segments,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1308 types => $types,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1309 range_type => $range_type,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1310 force => $force}
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1311 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1312 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1313
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1314 =head2 absolute
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1315
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1316 Title : absolute
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1317 Usage : $abs = $db->absolute([$abs]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1318 Function: gets/sets absolute mode
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1319 Returns : current setting of absolute mode boolean
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1320 Args : new setting for absolute mode boolean
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1321 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1322
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1323 $db-E<gt>absolute(1) will turn on absolute mode for the entire database.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1324 All segments retrieved will use absolute coordinates by default,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1325 rather than relative coordinates. You can still set them to use
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1326 relative coordinates by calling $segment-E<gt>absolute(0).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1327
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1328 Note that this is not the same as calling abs_segment(); it continues
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1329 to allow you to look up groups that are not used directly as reference
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1330 sequences.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1331
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1332 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1333
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1334 sub absolute {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1335 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1336 my $d = $self->{absolute};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1337 $self->{absolute} = shift if @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1338 $d;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1339 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1340
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1341 =head2 strict_bounds_checking
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1342
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1343 Title : strict_bounds_checking
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1344 Usage : $flag = $db->strict_bounds_checking([$flag])
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1345 Function: gets/sets strict bounds checking
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1346 Returns : current setting of bounds checking flag
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1347 Args : new setting for bounds checking flag
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1348 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1349
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1350 This flag enables extra checks for segment requests that go beyond the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1351 ends of their reference sequences. If bounds checking is enabled,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1352 then retrieved segments will be truncated to their physical length,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1353 and their truncated() methods will return true.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1354
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1355 If the flag is off (the default), then the module will return segments
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1356 that appear to extend beyond their physical boundaries. Requests for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1357 features beyond the end of the segment will, however, return empty.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1358
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1359 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1360
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1361 sub strict_bounds_checking {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1362 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1363 my $d = $self->{strict};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1364 $self->{strict} = shift if @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1365 $d;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1366 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1367
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1368 =head2 get_Seq_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1369
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1370 Title : get_Seq_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1371 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1372 Function: Gets a Bio::Seq object by its name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1373 Returns : a Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1374 Args : the id (as a string) of a sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1375 Throws : "id does not exist" exception
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1376
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1377 NOTE: Bio::DB::RandomAccessI compliant method
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1378
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1379 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1380
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1381 sub get_Seq_by_id {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1382 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1383 my $id = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1384 my $stream = $self->get_Stream_by_id($id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1385 return $stream->next_seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1386 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1387
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1388
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1389 =head2 get_Seq_by_accession
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1390
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1391 Title : get_Seq_by_accession
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1392 Usage : $seq = $db->get_Seq_by_accession('AL12234')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1393 Function: Gets a Bio::Seq object by its accession
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1394 Returns : a Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1395 Args : the id (as a string) of a sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1396 Throws : "id does not exist" exception
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1397
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1398 NOTE: Bio::DB::RandomAccessI compliant method
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1399
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1400 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1401
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1402 sub get_Seq_by_accession {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1403 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1404 my $id = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1405 my $stream = $self->get_Stream_by_accession($id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1406 return $stream->next_seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1407 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1408
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1409 =head2 get_Stream_by_acc ()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1410
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1411 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1412
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1413 =head2 get_Seq_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1414
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1415 Title : get_Seq_by_acc
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1416 Usage : $seq = $db->get_Seq_by_acc('X77802');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1417 Function: Gets a Bio::Seq object by accession number
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1418 Returns : A Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1419 Args : accession number (as a string)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1420 Throws : "acc does not exist" exception
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1421
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1422 NOTE: Bio::DB::RandomAccessI compliant method
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1423
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1424 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1425
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1426 sub get_Stream_by_name {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1427 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1428 my @ids = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1429 my $id = ref($ids[0]) ? $ids[0] : \@ids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1430 Bio::DB::GFF::ID_Iterator->new($self,$id,'name');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1431 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1432
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1433 =head2 get_Stream_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1434
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1435 Title : get_Stream_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1436 Usage : $seq = $db->get_Stream_by_id(@ids);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1437 Function: Retrieves a stream of Seq objects given their ids
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1438 Returns : a Bio::SeqIO stream object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1439 Args : an array of unique ids/accession numbers, or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1440 an array reference
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1441
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1442 NOTE: This is also called get_Stream_by_batch()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1443
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1444 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1445
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1446 sub get_Stream_by_id {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1447 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1448 my @ids = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1449 my $id = ref($ids[0]) ? $ids[0] : \@ids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1450 Bio::DB::GFF::ID_Iterator->new($self,$id,'feature');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1451 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1452
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1453 =head2 get_Stream_by_batch ()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1454
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1455 Title : get_Stream_by_batch
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1456 Usage : $seq = $db->get_Stream_by_batch(@ids);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1457 Function: Retrieves a stream of Seq objects given their ids
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1458 Returns : a Bio::SeqIO stream object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1459 Args : an array of unique ids/accession numbers, or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1460 an array reference
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1461
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1462 NOTE: This is the same as get_Stream_by_id().
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1463
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1464 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1465
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1466 *get_Stream_by_batch = \&get_Stream_by_id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1467
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1468
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1469 =head2 get_Stream_by_group ()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1470
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1471 Bioperl compatibility.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1472
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1473 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1474
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1475 sub get_Stream_by_group {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1476 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1477 my @ids = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1478 my $id = ref($ids[0]) ? $ids[0] : \@ids;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1479 Bio::DB::GFF::ID_Iterator->new($self,$id,'group');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1480 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1481
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1482 =head2 all_seqfeatures
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1483
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1484 Title : all_seqfeatures
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1485 Usage : @features = $db->all_seqfeatures(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1486 Function: fetch all the features in the database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1487 Returns : an array of features, or an iterator
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1488 Args : See below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1489 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1490
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1491 This is equivalent to calling $db-E<gt>features() without any types, and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1492 will return all the features in the database. The -merge and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1493 -iterator arguments are recognized, and behave the same as described
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1494 for features().
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1495
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1496 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1497
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1498 sub all_seqfeatures {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1499 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1500 my ($automerge,$iterator)= rearrange([
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1501 [qw(MERGE AUTOMERGE)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1502 'ITERATOR'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1503 ],@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1504 my @args;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1505 push @args,(-merge=>$automerge) if defined $automerge;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1506 push @args,(-iterator=>$iterator) if defined $iterator;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1507 $self->features(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1508 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1509
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1510 =head1 Creating and Loading GFF Databases
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1511
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1512 =head2 initialize
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1513
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1514 Title : initialize
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1515 Usage : $db->initialize(-erase=>$erase,-option1=>value1,-option2=>value2);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1516 Function: initialize a GFF database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1517 Returns : true if initialization successful
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1518 Args : a set of named parameters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1519 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1520
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1521 This method can be used to initialize an empty database. It takes the following
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1522 named arguments:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1523
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1524 -erase A boolean value. If true the database will be wiped clean if it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1525 already contains data.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1526
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1527 Other named arguments may be recognized by subclasses. They become database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1528 meta values that control various settable options.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1529
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1530 As a shortcut (and for backward compatibility) a single true argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1531 is the same as initialize(-erase=E<gt>1).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1532
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1533 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1534
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1535 sub initialize {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1536 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1537 #$self->do_initialize(1) if @_ == 1 && $_[0];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1538 #why was this line (^) here? I can't see that it actually does anything
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1539 #one option would be to execute the line and return, but I don't know
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1540 #why you would want to do that either.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1541
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1542 my ($erase,$meta) = rearrange(['ERASE'],@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1543 $meta ||= {};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1544
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1545 # initialize (possibly erasing)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1546 return unless $self->do_initialize($erase);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1547 my @default = $self->default_meta_values;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1548
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1549 # this is an awkward way of uppercasing the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1550 # even-numbered values (necessary for case-insensitive SQL databases)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1551 for (my $i=0; $i<@default; $i++) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1552 $default[$i] = uc $default[$i] if !($i % 2);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1553 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1554
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1555 my %values = (@default,%$meta);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1556 foreach (keys %values) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1557 $self->meta($_ => $values{$_});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1558 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1559 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1560 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1561
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1562
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1563 =head2 load_gff
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1564
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1565 Title : load_gff
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1566 Usage : $db->load_gff($file|$directory|$filehandle);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1567 Function: load GFF data into database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1568 Returns : count of records loaded
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1569 Args : a directory, a file, a list of files,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1570 or a filehandle
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1571 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1572
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1573 This method takes a single overloaded argument, which can be any of:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1574
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1575 =over 4
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1576
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1577 =item 1. a scalar corresponding to a GFF file on the system
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1578
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1579 A pathname to a local GFF file. Any files ending with the .gz, .Z, or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1580 .bz2 suffixes will be transparently decompressed with the appropriate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1581 command-line utility.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1582
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1583 =item 2. an array reference containing a list of GFF files on the system
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1584
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1585 For example ['/home/gff/gff1.gz','/home/gff/gff2.gz']
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1586
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1587 =item 3. directory path
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1588
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1589 The indicated directory will be searched for all files ending in the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1590 suffixes .gff, .gff.gz, .gff.Z or .gff.bz2.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1591
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1592 =item 4. filehandle
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1593
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1594 An open filehandle from which to read the GFF data. Tied filehandles
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1595 now work as well.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1596
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1597 =item 5. a pipe expression
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1598
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1599 A pipe expression will also work. For example, a GFF file on a remote
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1600 web server can be loaded with an expression like this:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1601
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1602 $db->load_gff("lynx -dump -source http://stein.cshl.org/gff_test |");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1603
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1604 =back
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1605
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1606 If successful, the method will return the number of GFF lines
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1607 successfully loaded.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1608
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1609 NOTE:this method used to be called load(), but has been changed. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1610 old method name is also recognized.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1611
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1612 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1613
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1614 sub load_gff {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1615 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1616 my $file_or_directory = shift || '.';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1617 return $self->do_load_gff($file_or_directory) if ref($file_or_directory) &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1618 tied *$file_or_directory;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1619
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1620 my $tied_stdin = tied(*STDIN);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1621 open SAVEIN,"<&STDIN" unless $tied_stdin;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1622 local @ARGV = $self->setup_argv($file_or_directory,'gff') or return; # to play tricks with reader
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1623 my $result = $self->do_load_gff('ARGV');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1624 open STDIN,"<&SAVEIN" unless $tied_stdin; # restore STDIN
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1625 return $result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1626 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1627
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1628 *load = \&load_gff;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1629
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1630 =head2 load_fasta
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1631
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1632 Title : load_fasta
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1633 Usage : $db->load_fasta($file|$directory|$filehandle);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1634 Function: load FASTA data into database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1635 Returns : count of records loaded
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1636 Args : a directory, a file, a list of files,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1637 or a filehandle
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1638 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1639
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1640 This method takes a single overloaded argument, which can be any of:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1641
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1642 =over 4
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1643
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1644 =item 1. scalar corresponding to a FASTA file on the system
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1645
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1646 A pathname to a local FASTA file. Any files ending with the .gz, .Z, or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1647 .bz2 suffixes will be transparently decompressed with the appropriate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1648 command-line utility.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1649
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1650 =item 2. array reference containing a list of FASTA files on the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1651 system
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1652
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1653 For example ['/home/fasta/genomic.fa.gz','/home/fasta/genomic.fa.gz']
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1654
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1655 =item 3. path to a directory
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1656
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1657 The indicated directory will be searched for all files ending in the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1658 suffixes .fa, .fa.gz, .fa.Z or .fa.bz2.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1659
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1660 a=item 4. filehandle
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1661
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1662 An open filehandle from which to read the FASTA data.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1663
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1664 =item 5. pipe expression
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1665
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1666 A pipe expression will also work. For example, a FASTA file on a remote
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1667 web server can be loaded with an expression like this:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1668
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1669 $db->load_gff("lynx -dump -source http://stein.cshl.org/fasta_test.fa |");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1670
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1671 =back
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1672
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1673 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1674
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1675 sub load_fasta {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1676 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1677 my $file_or_directory = shift || '.';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1678 return $self->load_sequence($file_or_directory) if ref($file_or_directory) &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1679 tied *$file_or_directory;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1680
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1681 my $tied = tied(*STDIN);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1682 open SAVEIN,"<&STDIN" unless $tied;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1683 local @ARGV = $self->setup_argv($file_or_directory,'fa','dna','fasta') or return; # to play tricks with reader
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1684 my $result = $self->load_sequence('ARGV');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1685 open STDIN,"<&SAVEIN" unless $tied; # restore STDIN
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1686 return $result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1687 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1688
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1689 =head2 load_sequence_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1690
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1691 Title : load_sequence_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1692 Usage : $db->load_sequence_string($id,$dna)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1693 Function: load a single DNA entry
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1694 Returns : true if successfully loaded
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1695 Args : a raw sequence string (DNA, RNA, protein)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1696 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1697
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1698 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1699
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1700 sub load_sequence_string {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1701 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1702 my ($acc,$seq) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1703 my $offset = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1704 $self->insert_sequence_chunk($acc,\$offset,\$seq) or return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1705 $self->insert_sequence($acc,$offset,$seq) or return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1706 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1707 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1708
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1709 sub setup_argv {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1710 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1711 my $file_or_directory = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1712 my @suffixes = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1713 no strict 'refs'; # so that we can call fileno() on the argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1714
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1715 my @argv;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1716
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1717 if (-d $file_or_directory) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1718 @argv = map { glob("$file_or_directory/*.{$_,$_.gz,$_.Z,$_.bz2}")} @suffixes;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1719 }elsif (my $fd = fileno($file_or_directory)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1720 open STDIN,"<&=$fd" or $self->throw("Can't dup STDIN");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1721 @argv = '-';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1722 } elsif (ref $file_or_directory) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1723 @argv = @$file_or_directory;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1724 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1725 @argv = $file_or_directory;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1726 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1727
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1728 foreach (@argv) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1729 if (/\.gz$/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1730 $_ = "gunzip -c $_ |";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1731 } elsif (/\.Z$/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1732 $_ = "uncompress -c $_ |";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1733 } elsif (/\.bz2$/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1734 $_ = "bunzip2 -c $_ |";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1735 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1736 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1737 @argv;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1738 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1739
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1740 =head2 lock_on_load
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1741
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1742 Title : lock_on_load
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1743 Usage : $lock = $db->lock_on_load([$lock])
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1744 Function: set write locking during load
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1745 Returns : current value of lock-on-load flag
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1746 Args : new value of lock-on-load-flag
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1747 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1748
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1749 This method is honored by some of the adaptors. If the value is true,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1750 the tables used by the GFF modules will be locked for writing during
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1751 loads and inaccessible to other processes.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1752
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1753 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1754
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1755 sub lock_on_load {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1756 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1757 my $d = $self->{lock};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1758 $self->{lock} = shift if @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1759 $d;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1760 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1761
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1762 =head2 meta
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1763
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1764 Title : meta
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1765 Usage : $value = $db->meta($name [,$newval])
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1766 Function: get or set a meta variable
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1767 Returns : a string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1768 Args : meta variable name and optionally value
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1769 Status : abstract
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1770
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1771 Get or set a named metavalues for the database. Metavalues can be
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1772 used for database-specific settings.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1773
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1774 By default, this method does nothing!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1775
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1776 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1777
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1778 sub meta {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1779 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1780 my ($name,$value) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1781 return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1782 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1783
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1784 =head2 default_meta_values
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1785
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1786 Title : default_meta_values
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1787 Usage : %values = $db->default_meta_values
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1788 Function: empty the database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1789 Returns : a list of tag=>value pairs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1790 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1791 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1792
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1793 This method returns a list of tag=E<gt>value pairs that contain default
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1794 meta information about the database. It is invoked by initialize() to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1795 write out the default meta values. The base class version returns an
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1796 empty list.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1797
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1798 For things to work properly, meta value names must be UPPERCASE.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1799
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1800 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1801
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1802 sub default_meta_values {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1803 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1804 return ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1805 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1806
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1807
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1808 =head2 error
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1809
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1810 Title : error
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1811 Usage : $db->error( [$new error] );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1812 Function: read or set error message
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1813 Returns : error message
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1814 Args : an optional argument to set the error message
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1815 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1816
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1817 This method can be used to retrieve the last error message. Errors
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1818 are not reset to empty by successful calls, so contents are only valid
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1819 immediately after an error condition has been detected.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1820
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1821 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1822
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1823 sub error {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1824 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1825 my $g = $self->{error};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1826 $self->{error} = join '',@_ if @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1827 $g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1828 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1829
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1830 =head2 debug
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1831
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1832 Title : debug
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1833 Usage : $db->debug( [$flag] );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1834 Function: read or set debug flag
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1835 Returns : current value of debug flag
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1836 Args : new debug flag (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1837 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1838
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1839 This method can be used to turn on debug messages. The exact nature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1840 of those messages depends on the adaptor in use.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1841
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1842 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1843
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1844 sub debug {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1845 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1846 my $g = $self->{debug};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1847 $self->{debug} = shift if @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1848 $g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1849 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1850
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1851
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1852 =head2 automerge
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1853
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1854 Title : automerge
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1855 Usage : $db->automerge( [$new automerge] );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1856 Function: get or set automerge value
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1857 Returns : current value (boolean)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1858 Args : an optional argument to set the automerge value
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1859 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1860
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1861 By default, this module will use the aggregators to merge groups into
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1862 single composite objects. This default can be changed to false by
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1863 calling automerge(0).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1864
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1865 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1866
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1867 sub automerge {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1868 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1869 my $g = $self->{automerge};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1870 $self->{automerge} = shift if @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1871 $g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1872 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1873
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1874 =head2 attributes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1875
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1876 Title : attributes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1877 Usage : @attributes = $db->attributes($id,$name)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1878 Function: get the "attributres" on a particular feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1879 Returns : an array of string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1880 Args : feature ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1881 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1882
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1883 Some GFF version 2 files use the groups column to store a series of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1884 attribute/value pairs. In this interpretation of GFF, the first such
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1885 pair is treated as the primary group for the feature; subsequent pairs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1886 are treated as attributes. Two attributes have special meaning:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1887 "Note" is for backward compatibility and is used for unstructured text
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1888 remarks. "Alias" is considered as a synonym for the feature name.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1889
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1890 If no name is provided, then attributes() returns a flattened hash, of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1891 attribute=E<gt>value pairs. This lets you do:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1892
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1893 %attributes = $db->attributes($id);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1894
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1895 Normally, attributes() will be called by the feature:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1896
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1897 @notes = $feature->attributes('Note');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1898
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1899 In a scalar context, attributes() returns the first value of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1900 attribute if a tag is present, otherwise a hash reference in which the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1901 keys are attribute names and the values are anonymous arrays
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1902 containing the values.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1903
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1904 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1905
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1906 sub attributes {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1907 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1908 my ($id,$tag) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1909 my @result = $self->do_attributes($id,$tag) or return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1910 return @result if wantarray;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1911
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1912 # what to do in an array context
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1913 return $result[0] if $tag;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1914 my %result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1915 while (my($key,$value) = splice(@result,0,2)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1916 push @{$result{$key}},$value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1917 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1918 return \%result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1919 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1920
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1921 =head2 fast_queries
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1922
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1923 Title : fast_queries
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1924 Usage : $flag = $db->fast_queries([$flag])
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1925 Function: turn on and off the "fast queries" option
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1926 Returns : a boolean
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1927 Args : a boolean flag (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1928 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1929
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1930 The mysql database driver (and possibly others) support a "fast" query
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1931 mode that caches results on the server side. This makes queries come
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1932 back faster, particularly when creating iterators. The downside is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1933 that while iterating, new queries will die with a "command synch"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1934 error. This method turns the feature on and off.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1935
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1936 For databases that do not support a fast query, this method has no
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1937 effect.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1938
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1939 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1940
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1941 # override this method in order to set the mysql_use_result attribute, which is an obscure
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1942 # but extremely powerful optimization for both performance and memory.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1943 sub fast_queries {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1944 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1945 my $d = $self->{fast_queries};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1946 $self->{fast_queries} = shift if @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1947 $d;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1948 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1949
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1950 =head2 add_aggregator
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1951
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1952 Title : add_aggregator
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1953 Usage : $db->add_aggregator($aggregator)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1954 Function: add an aggregator to the list
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1955 Returns : nothing
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1956 Args : an aggregator
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1957 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1958
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1959 This method will append an aggregator to the end of the list of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1960 registered aggregators. Three different argument types are accepted:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1961
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1962 1) a Bio::DB::GFF::Aggregator object -- will be added
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1963 2) a string in the form "aggregator_name{subpart1,subpart2,subpart3/main_method}"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1964 -- will be turned into a Bio::DB::GFF::Aggregator object (the /main_method
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1965 part is optional).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1966 3) a valid Perl token -- will be turned into a Bio::DB::GFF::Aggregator
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1967 subclass, where the token corresponds to the subclass name.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1968
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1969 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1970
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1971 sub add_aggregator {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1972 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1973 my $aggregator = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1974 my $list = $self->{aggregators} ||= [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1975 if (ref $aggregator) { # an object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1976 @$list = grep {$_->get_method ne $aggregator->get_method} @$list;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1977 push @$list,$aggregator;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1978 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1979
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1980 elsif ($aggregator =~ /^(\w+)\{([^\/\}]+)\/?(.*)\}$/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1981 my($agg_name,$subparts,$mainpart) = ($1,$2,$3);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1982 my @subparts = split /,\s*/,$subparts;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1983 my @args = (-method => $agg_name,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1984 -sub_parts => \@subparts);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1985 push @args,(-main_method => $mainpart) if $mainpart;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1986 warn "making an aggregator with (@args), subparts = @subparts" if $self->debug;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1987 push @$list,Bio::DB::GFF::Aggregator->new(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1988 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1989
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1990 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1991 my $class = "Bio::DB::GFF::Aggregator::\L${aggregator}\E";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1992 eval "require $class";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1993 $self->throw("Unable to load $aggregator aggregator: $@") if $@;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1994 push @$list,$class->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1995 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1996 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1997
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1998 =head2 aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1999
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2000 Title : aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2001 Usage : $db->aggregators([@new_aggregators]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2002 Function: retrieve list of aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2003 Returns : list of aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2004 Args : a list of aggregators to set (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2005 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2006
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2007 This method will get or set the list of aggregators assigned to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2008 the database. If 1 or more arguments are passed, the existing
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2009 set will be cleared.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2010
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2011 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2012
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2013 sub aggregators {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2014 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2015 my $d = $self->{aggregators};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2016 if (@_) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2017 $self->clear_aggregators;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2018 $self->add_aggregator($_) foreach @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2019 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2020 return unless $d;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2021 return @$d;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2022 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2023
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2024 =head2 clear_aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2025
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2026 Title : clear_aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2027 Usage : $db->clear_aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2028 Function: clears list of aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2029 Returns : nothing
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2030 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2031 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2032
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2033 This method will clear the aggregators stored in the database object.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2034 Use aggregators() or add_aggregator() to add some back.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2035
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2036 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2037
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2038 sub clear_aggregators { shift->{aggregators} = [] }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2039
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2040 =head1 Methods for use by Subclasses
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2041
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2042 The following methods are chiefly of interest to subclasses and are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2043 not intended for use by end programmers.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2044
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2045 =head2 abscoords
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2046
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2047 Title : abscoords
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2048 Usage : $db->abscoords($name,$class,$refseq)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2049 Function: finds position of a landmark in reference coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2050 Returns : ($ref,$class,$start,$stop,$strand)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2051 Args : name and class of landmark
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2052 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2053
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2054 This method is called by Bio::DB::GFF::RelSegment to obtain the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2055 absolute coordinates of a sequence landmark. The arguments are the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2056 name and class of the landmark. If successful, abscoords() returns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2057 the ID of the reference sequence, its class, its start and stop
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2058 positions, and the orientation of the reference sequence's coordinate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2059 system ("+" for forward strand, "-" for reverse strand).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2060
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2061 If $refseq is present in the argument list, it forces the query to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2062 search for the landmark in a particular reference sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2063
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2064 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2065
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2066 sub abscoords {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2067 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2068 my ($name,$class,$refseq) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2069 $class ||= $self->{default_class};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2070 $self->get_abscoords($name,$class,$refseq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2071 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2072
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2073 =head1 Protected API
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2074
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2075 The following methods are not intended for public consumption, but are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2076 intended to be overridden/implemented by adaptors.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2077
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2078 =head2 default_aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2079
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2080 Title : default_aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2081 Usage : $db->default_aggregators;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2082 Function: retrieve list of aggregators
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2083 Returns : array reference containing list of aggregator names
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2084 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2085 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2086
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2087 This method (which is intended to be overridden by adaptors) returns a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2088 list of standard aggregators to be applied when no aggregators are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2089 specified in the constructor.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2090
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2091 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2092
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2093 sub default_aggregators {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2094 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2095 return ['processed_transcript','alignment'];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2096 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2097
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2098 =head2 do_load_gff
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2099
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2100 Title : do_load_gff
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2101 Usage : $db->do_load_gff($handle)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2102 Function: load a GFF input stream
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2103 Returns : number of features loaded
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2104 Args : A filehandle.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2105 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2106
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2107 This method is called to load a GFF data stream. The method will read
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2108 GFF features from E<lt>E<gt> and load them into the database. On exit the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2109 method must return the number of features loaded.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2110
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2111 Note that the method is responsible for parsing the GFF lines. This
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2112 is to allow for differences in the interpretation of the "group"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2113 field, which are legion.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2114
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2115 You probably want to use load_gff() instead. It is more flexible
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2116 about the arguments it accepts.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2117
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2118 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2119
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2120 # load from <>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2121 sub do_load_gff {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2122 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2123 my $io_handle = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2124
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2125 local $self->{gff3_flag} = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2126 $self->setup_load();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2127
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2128 my $fasta_sequence_id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2129
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2130 while (<$io_handle>) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2131 chomp;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2132 $self->{gff3_flag}++ if /^\#\#gff-version\s+3/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2133 if (/^>(\S+)/) { # uh oh, sequence coming
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2134 $fasta_sequence_id = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2135 last;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2136 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2137 if (/^\#\#\s*sequence-region\s+(\S+)\s+(\d+)\s+(\d+)/i) { # header line
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2138 $self->load_gff_line(
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2139 {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2140 ref => $1,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2141 class => 'Sequence',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2142 source => 'reference',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2143 method => 'Component',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2144 start => $2,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2145 stop => $3,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2146 score => undef,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2147 strand => undef,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2148 phase => undef,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2149 gclass => 'Sequence',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2150 gname => $1,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2151 tstart => undef,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2152 tstop => undef,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2153 attributes => [],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2154 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2155 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2156 next;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2157 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2158
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2159 next if /^\#/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2160 my ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) = split "\t";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2161 next unless defined($ref) && defined($method) && defined($start) && defined($stop);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2162 foreach (\$score,\$strand,\$phase) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2163 undef $$_ if $$_ eq '.';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2164 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2165
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2166 my ($gclass,$gname,$tstart,$tstop,$attributes) = $self->split_group($group,$self->{gff3_flag});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2167
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2168 # no standard way in the GFF file to denote the class of the reference sequence -- drat!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2169 # so we invoke the factory to do it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2170 my $class = $self->refclass($ref);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2171
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2172 # call subclass to do the dirty work
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2173 if ($start > $stop) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2174 ($start,$stop) = ($stop,$start);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2175 if ($strand eq '+') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2176 $strand = '-';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2177 } elsif ($strand eq '-') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2178 $strand = '+';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2179 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2180 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2181 $self->load_gff_line({ref => $ref,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2182 class => $class,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2183 source => $source,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2184 method => $method,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2185 start => $start,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2186 stop => $stop,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2187 score => $score,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2188 strand => $strand,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2189 phase => $phase,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2190 gclass => $gclass,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2191 gname => $gname,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2192 tstart => $tstart,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2193 tstop => $tstop,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2194 attributes => $attributes}
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2195 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2196 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2197
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2198 my $result = $self->finish_load();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2199 $result += $self->load_sequence($io_handle,$fasta_sequence_id)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2200 if defined $fasta_sequence_id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2201 $result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2202
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2203 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2204
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2205 =head2 load_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2206
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2207 Title : load_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2208 Usage : $db->load_sequence($handle [,$id])
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2209 Function: load a FASTA data stream
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2210 Returns : number of sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2211 Args : a filehandle and optionally the ID of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2212 the first sequence in the stream.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2213 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2214
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2215 You probably want to use load_fasta() instead. The $id argument is a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2216 hack used to switch from GFF loading to FASTA loading when load_gff()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2217 discovers FASTA data hiding at the bottom of the GFF file (as Artemis
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2218 does).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2219
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2220 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2221
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2222 sub load_sequence {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2223 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2224 my $io_handle = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2225 my $id = shift; # hack for GFF files that contain fasta data
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2226
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2227 # read fasta file(s) from ARGV
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2228 my ($seq,$offset,$loaded) = (undef,0,0);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2229 while (<$io_handle>) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2230 chomp;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2231 if (/^>(\S+)/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2232 $self->insert_sequence($id,$offset,$seq) if $id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2233 $id = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2234 $offset = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2235 $seq = '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2236 $loaded++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2237 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2238 $seq .= $_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2239 $self->insert_sequence_chunk($id,\$offset,\$seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2240 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2241 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2242 $self->insert_sequence($id,$offset,$seq) if $id;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2243 $loaded+0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2244 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2245
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2246 sub insert_sequence_chunk {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2247 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2248 my ($id,$offsetp,$seqp) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2249 if (my $cs = $self->dna_chunk_size) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2250 while (length($$seqp) >= $cs) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2251 my $chunk = substr($$seqp,0,$cs);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2252 $self->insert_sequence($id,$$offsetp,$chunk);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2253 $$offsetp += length($chunk);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2254 substr($$seqp,0,$cs) = '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2255 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2256 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2257 return 1; # the calling routine may expect success or failure
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2258 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2259
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2260 # used to store big pieces of DNA in itty bitty pieces
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2261 sub dna_chunk_size {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2262 return 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2263 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2264
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2265 sub insert_sequence {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2266 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2267 my($id,$offset,$seq) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2268 $self->throw('insert_sequence(): must be defined in subclass');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2269 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2270
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2271 # This is the default class for reference points. Defaults to Sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2272 sub default_class {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2273 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2274 my $d = exists($self->{default_class}) ? $self->{default_class} : 'Sequence';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2275 $self->{default_class} = shift if @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2276 $d;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2277 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2278
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2279 # gets name of the reference sequence, and returns its class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2280 # currently just calls default_class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2281 sub refclass {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2282 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2283 my $name = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2284 return $self->default_class;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2285 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2286
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2287 =head2 setup_load
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2288
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2289 Title : setup_load
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2290 Usage : $db->setup_load
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2291 Function: called before load_gff_line()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2292 Returns : void
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2293 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2294 Status : abstract
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2295
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2296 This abstract method gives subclasses a chance to do any
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2297 schema-specific initialization prior to loading a set of GFF records.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2298 It must be implemented by a subclass.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2299
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2300 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2301
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2302 sub setup_load {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2303 # default, do nothing
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2304 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2305
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2306 =head2 finish_load
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2307
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2308 Title : finish_load
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2309 Usage : $db->finish_load
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2310 Function: called after load_gff_line()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2311 Returns : number of records loaded
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2312 Args : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2313 Status :abstract
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2314
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2315 This method gives subclasses a chance to do any schema-specific
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2316 cleanup after loading a set of GFF records.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2317
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2318 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2319
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2320 sub finish_load {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2321 # default, do nothing
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2322 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2323
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2324 =head2 load_gff_line
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2325
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2326 Title : load_gff_line
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2327 Usage : $db->load_gff_line(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2328 Function: called to load one parsed line of GFF
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2329 Returns : true if successfully inserted
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2330 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2331 Status : abstract
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2332
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2333 This abstract method is called once per line of the GFF and passed a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2334 hashref containing parsed GFF fields. The fields are:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2335
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2336 {ref => $ref,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2337 class => $class,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2338 source => $source,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2339 method => $method,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2340 start => $start,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2341 stop => $stop,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2342 score => $score,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2343 strand => $strand,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2344 phase => $phase,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2345 gclass => $gclass,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2346 gname => $gname,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2347 tstart => $tstart,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2348 tstop => $tstop,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2349 attributes => $attributes}
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2350
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2351 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2352
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2353 sub load_gff_line {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2354 shift->throw("load_gff_line(): must be implemented by an adaptor");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2355 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2356
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2357
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2358 =head2 do_initialize
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2359
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2360 Title : do_initialize
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2361 Usage : $db->do_initialize([$erase])
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2362 Function: initialize and possibly erase database
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2363 Returns : true if successful
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2364 Args : optional erase flag
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2365 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2366
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2367 This method implements the initialize() method described above, and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2368 takes the same arguments.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2369
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2370 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2371
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2372 sub do_initialize {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2373 shift->throw('do_initialize(): must be implemented by an adaptor');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2374 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2375
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2376 =head2 dna
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2377
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2378 Title : dna
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2379 Usage : $db->dna($id,$start,$stop,$class)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2380 Function: return the raw DNA string for a segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2381 Returns : a raw DNA string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2382 Args : id of the sequence, its class, start and stop positions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2383 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2384
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2385 This method is invoked by Bio::DB::GFF::Segment to fetch the raw DNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2386 sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2387
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2388 Arguments: -name sequence name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2389 -start start position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2390 -stop stop position
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2391 -class sequence class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2392
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2393 If start and stop are both undef, then the entire DNA is retrieved.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2394 So to fetch the whole dna, call like this:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2395
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2396 $db->dna($name_of_sequence);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2397
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2398 or like this:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2399
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2400 $db->dna(-name=>$name_of_sequence,-class=>$class_of_sequence);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2401
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2402 NOTE: you will probably prefer to create a Segment and then invoke its
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2403 dna() method.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2404
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2405 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2406
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2407 # call to return the DNA string for the indicated region
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2408 # real work is done by get_dna()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2409 sub dna {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2410 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2411 my ($id,$start,$stop,$class) = rearrange([
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2412 [qw(NAME ID REF REFSEQ)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2413 qw(START),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2414 [qw(STOP END)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2415 'CLASS',
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2416 ],@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2417 # return unless defined $start && defined $stop;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2418 $self->get_dna($id,$start,$stop,$class);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2419 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2420
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2421 sub features_in_range {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2422 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2423 my ($range_type,$refseq,$class,$start,$stop,$types,$parent,$sparse,$automerge,$iterator,$other) =
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2424 rearrange([
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2425 [qw(RANGE_TYPE)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2426 [qw(REF REFSEQ)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2427 qw(CLASS),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2428 qw(START),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2429 [qw(STOP END)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2430 [qw(TYPE TYPES)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2431 qw(PARENT),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2432 [qw(RARE SPARSE)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2433 [qw(MERGE AUTOMERGE)],
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2434 'ITERATOR'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2435 ],@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2436 $other ||= {};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2437 $automerge = $types && $self->automerge unless defined $automerge;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2438 $self->throw("range type must be one of {".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2439 join(',',keys %valid_range_types).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2440 "}\n")
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2441 unless $valid_range_types{lc $range_type};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2442 $self->_features({
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2443 rangetype => lc $range_type,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2444 refseq => $refseq,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2445 refclass => $class,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2446 start => $start,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2447 stop => $stop,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2448 types => $types },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2449 {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2450 sparse => $sparse,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2451 automerge => $automerge,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2452 iterator => $iterator,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2453 %$other,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2454 },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2455 $parent);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2456 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2457
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2458 =head2 get_dna
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2459
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2460 Title : get_dna
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2461 Usage : $db->get_dna($id,$start,$stop,$class)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2462 Function: get DNA for indicated segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2463 Returns : the dna string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2464 Args : sequence ID, start, stop and class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2465 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2466
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2467 If start E<gt> stop and the sequence is nucleotide, then this method
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2468 should return the reverse complement. The sequence class may be
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2469 ignored by those databases that do not recognize different object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2470 types.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2471
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2472 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2473
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2474 sub get_dna {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2475 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2476 my ($id,$start,$stop,$class,) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2477 $self->throw("get_dna() must be implemented by an adaptor");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2478 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2479
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2480 =head2 get_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2481
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2482 Title : get_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2483 Usage : $db->get_features($search,$options,$callback)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2484 Function: get list of features for a region
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2485 Returns : count of number of features retrieved
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2486 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2487 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2488
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2489 The first argument is a hash reference containing search criteria for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2490 retrieving features. It contains the following keys:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2491
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2492 rangetype One of "overlaps", "contains" or "contained_in". Indicates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2493 the type of range query requested.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2494
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2495 refseq ID of the landmark that establishes the absolute
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2496 coordinate system.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2497
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2498 refclass Class of this landmark. Can be ignored by implementations
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2499 that don't recognize such distinctions.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2500
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2501 start Start of the range, inclusive.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2502
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2503 stop Stop of the range, inclusive.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2504
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2505 types Array reference containing the list of annotation types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2506 to fetch from the database. Each annotation type is an
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2507 array reference consisting of [source,method].
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2508
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2509 The second argument is a hash reference containing certain options
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2510 that affect the way information is retrieved:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2511
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2512 sort_by_group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2513 A flag. If true, means that the returned features should be
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2514 sorted by the group that they're in.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2515
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2516 sparse A flag. If true, means that the expected density of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2517 features is such that it will be more efficient to search
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2518 by type rather than by range. If it is taking a long
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2519 time to fetch features, give this a try.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2520
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2521 binsize A true value will create a set of artificial features whose
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2522 start and stop positions indicate bins of the given size, and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2523 whose scores are the number of features in the bin. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2524 class of the feature will be set to "bin", and its name to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2525 "method:source". This is a handy way of generating histograms
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2526 of feature density.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2527
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2528 The third argument, the $callback, is a code reference to which
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2529 retrieved features are passed. It is described in more detail below.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2530
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2531 This routine is responsible for getting arrays of GFF data out of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2532 database and passing them to the callback subroutine. The callback
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2533 does the work of constructing a Bio::DB::GFF::Feature object out of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2534 that data. The callback expects a list of 13 fields:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2535
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2536 $refseq The reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2537 $start feature start
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2538 $stop feature stop
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2539 $source feature source
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2540 $method feature method
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2541 $score feature score
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2542 $strand feature strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2543 $phase feature phase
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2544 $groupclass group class (may be undef)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2545 $groupname group ID (may be undef)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2546 $tstart target start for similarity hits (may be undef)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2547 $tstop target stop for similarity hits (may be undef)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2548 $feature_id A unique feature ID (may be undef)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2549
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2550 These fields are in the same order as the raw GFF file, with the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2551 exception that the group column has been parsed into group class and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2552 group name fields.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2553
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2554 The feature ID, if provided, is a unique identifier of the feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2555 line. The module does not depend on this ID in any way, but it is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2556 available via Bio::DB::GFF-E<gt>id() if wanted. In the dbi::mysql and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2557 dbi::mysqlopt adaptor, the ID is a unique row ID. In the acedb
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2558 adaptor it is not used.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2559
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2560 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2561
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2562 sub get_features{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2563 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2564 my ($search,$options,$callback) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2565 $self->throw("get_features() must be implemented by an adaptor");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2566 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2567
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2568
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2569 =head2 _feature_by_name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2570
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2571 Title : _feature_by_name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2572 Usage : $db->_feature_by_name($class,$name,$location,$callback)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2573 Function: get a list of features by name and class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2574 Returns : count of number of features retrieved
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2575 Args : name of feature, class of feature, and a callback
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2576 Status : abstract
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2577
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2578 This method is used internally. The callback arguments are the same
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2579 as those used by make_feature(). This method must be overidden by
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2580 subclasses.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2581
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2582 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2583
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2584 sub _feature_by_name {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2585 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2586 my ($class,$name,$location,$callback) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2587 $self->throw("_feature_by_name() must be implemented by an adaptor");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2588 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2589
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2590 sub _feature_by_attribute {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2591 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2592 my ($attributes,$callback) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2593 $self->throw("_feature_by_name() must be implemented by an adaptor");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2594 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2595
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2596 =head2 _feature_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2597
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2598 Title : _feature_by_id
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2599 Usage : $db->_feature_by_id($ids,$type,$callback)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2600 Function: get a feature based
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2601 Returns : count of number of features retrieved
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2602 Args : arrayref to feature IDs to fetch
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2603 Status : abstract
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2604
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2605 This method is used internally to fetch features either by their ID or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2606 their group ID. $ids is a arrayref containing a list of IDs, $type is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2607 one of "feature" or "group", and $callback is a callback. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2608 callback arguments are the same as those used by make_feature(). This
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2609 method must be overidden by subclasses.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2610
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2611 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2612
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2613 sub _feature_by_id {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2614 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2615 my ($ids,$type,$callback) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2616 $self->throw("_feature_by_id() must be implemented by an adaptor");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2617 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2618
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2619 =head2 overlapping_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2620
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2621 Title : overlapping_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2622 Usage : $db->overlapping_features(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2623 Function: get features that overlap the indicated range
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2624 Returns : a list of Bio::DB::GFF::Feature objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2625 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2626 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2627
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2628 This method is invoked by Bio::DB::GFF::Segment-E<gt>features() to find
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2629 the list of features that overlap a given range. It is generally
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2630 preferable to create the Segment first, and then fetch the features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2631
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2632 This method takes set of named arguments:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2633
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2634 -refseq ID of the reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2635 -class Class of the reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2636 -start Start of the desired range in refseq coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2637 -stop Stop of the desired range in refseq coordinates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2638 -types List of feature types to return. Argument is an array
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2639 reference containing strings of the format "method:source"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2640 -parent A parent Bio::DB::GFF::Segment object, used to create
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2641 relative coordinates in the generated features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2642 -rare Turn on an optimization suitable for a relatively rare feature type,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2643 where it will be faster to filter by feature type first
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2644 and then by position, rather than vice versa.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2645 -merge Whether to apply aggregators to the generated features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2646 -iterator Whether to return an iterator across the features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2647
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2648 If -iterator is true, then the method returns a single scalar value
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2649 consisting of a Bio::SeqIO object. You can call next_seq() repeatedly
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2650 on this object to fetch each of the features in turn. If iterator is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2651 false or absent, then all the features are returned as a list.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2652
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2653 Currently aggregation is disabled when iterating over a series of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2654 features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2655
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2656 Types are indicated using the nomenclature "method:source". Either of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2657 these fields can be omitted, in which case a wildcard is used for the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2658 missing field. Type names without the colon (e.g. "exon") are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2659 interpreted as the method name and a source wild card. Regular
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2660 expressions are allowed in either field, as in: "similarity:BLAST.*".
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2661
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2662 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2663
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2664 # call to return the features that overlap the named region
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2665 # real work is done by get_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2666 sub overlapping_features {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2667 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2668 $self->features_in_range(-range_type=>'overlaps',@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2669 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2670
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2671 =head2 contained_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2672
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2673 Title : contained_features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2674 Usage : $db->contained_features(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2675 Function: get features that are contained within the indicated range
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2676 Returns : a list of Bio::DB::GFF::Feature objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2677 Args : see overlapping_features()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2678 Status : public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2679
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2680 This call is similar to overlapping_features(), except that it only
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2681 retrieves features whose end points are completely contained within
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2682 the specified range.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2683
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2684 Generally you will want to fetch a Bio::DB::GFF::Segment object and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2685 call its contained_features() method rather than call this directly.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2686
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2687 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2688
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2689 # The same, except that it only returns features that are completely contained within the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2690 # range (much faster usually)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2691 sub contained_features {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2692 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2693 $self->features_in_range(-range_type=>'contains',@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2694 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2695
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2696 =head2 contained_in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2697
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2698 Title : contained_in
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2699 Usage : @features = $s->contained_in(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2700 Function: get features that contain this segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2701 Returns : a list of Bio::DB::GFF::Feature objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2702 Args : see features()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2703 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2704
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2705 This is identical in behavior to features() except that it returns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2706 only those features that completely contain the segment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2707
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2708 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2709
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2710 sub contained_in {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2711 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2712 $self->features_in_range(-range_type=>'contained_in',@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2713 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2714
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2715 =head2 get_abscoords
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2716
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2717 Title : get_abscoords
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2718 Usage : $db->get_abscoords($name,$class,$refseq)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2719 Function: get the absolute coordinates of sequence with name & class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2720 Returns : ($absref,$absstart,$absstop,$absstrand)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2721 Args : name and class of the landmark
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2722 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2723
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2724 Given the name and class of a genomic landmark, this function returns
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2725 a four-element array consisting of:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2726
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2727 $absref the ID of the reference sequence that contains this landmark
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2728 $absstart the position at which the landmark starts
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2729 $absstop the position at which the landmark stops
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2730 $absstrand the strand of the landmark, relative to the reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2731
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2732 If $refseq is provided, the function searches only within the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2733 specified reference sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2734
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2735 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2736
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2737 sub get_abscoords {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2738 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2739 my ($name,$class,$refseq) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2740 $self->throw("get_abscoords() must be implemented by an adaptor");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2741 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2742
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2743 =head2 get_types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2744
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2745 Title : get_types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2746 Usage : $db->get_types($absref,$class,$start,$stop,$count)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2747 Function: get list of all feature types on the indicated segment
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2748 Returns : list or hash of Bio::DB::GFF::Typename objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2749 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2750 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2751
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2752 Arguments are:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2753
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2754 $absref the ID of the reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2755 $class the class of the reference sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2756 $start the position to start counting
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2757 $stop the position to end counting
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2758 $count a boolean indicating whether to count the number
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2759 of occurrences of each feature type
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2760
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2761 If $count is true, then a hash is returned. The keys of the hash are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2762 feature type names in the format "method:source" and the values are
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2763 the number of times a feature of this type overlaps the indicated
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2764 segment. Otherwise, the call returns a set of Bio::DB::GFF::Typename
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2765 objects. If $start or $stop are undef, then all features on the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2766 indicated segment are enumerated. If $absref is undef, then the call
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2767 returns all feature types in the database.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2768
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2769 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2770
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2771 sub get_types {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2772 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2773 my ($refseq,$class,$start,$stop,$count,$types) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2774 $self->throw("get_types() must be implemented by an adaptor");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2775 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2776
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2777 =head2 make_feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2778
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2779 Title : make_feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2780 Usage : $db->make_feature(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2781 Function: Create a Bio::DB::GFF::Feature object from string data
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2782 Returns : a Bio::DB::GFF::Feature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2783 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2784 Status : internal
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2785
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2786 This takes 14 arguments (really!):
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2787
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2788 $parent A Bio::DB::GFF::RelSegment object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2789 $group_hash A hashref containing unique list of GFF groups
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2790 $refname The name of the reference sequence for this feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2791 $refclass The class of the reference sequence for this feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2792 $start Start of feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2793 $stop Stop of feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2794 $source Feature source field
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2795 $method Feature method field
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2796 $score Feature score field
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2797 $strand Feature strand
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2798 $phase Feature phase
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2799 $group_class Class of feature group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2800 $group_name Name of feature group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2801 $tstart For homologies, start of hit on target
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2802 $tstop Stop of hit on target
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2803
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2804 The $parent argument, if present, is used to establish relative
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2805 coordinates in the resulting Bio::DB::Feature object. This allows one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2806 feature to generate a list of other features that are relative to its
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2807 coordinate system (for example, finding the coordinates of the second
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2808 exon relative to the coordinates of the first).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2809
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2810 The $group_hash allows the group_class/group_name strings to be turned
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2811 into rich database objects via the make_obect() method (see above).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2812 Because these objects may be expensive to create, $group_hash is used
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2813 to uniquefy them. The index of this hash is the composite key
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2814 {$group_class,$group_name,$tstart,$tstop}. Values are whatever object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2815 is returned by the make_object() method.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2816
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2817 The remainder of the fields are taken from the GFF line, with the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2818 exception that "Target" features, which contain information about the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2819 target of a homology search, are parsed into their components.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2820
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2821 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2822
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2823 # This call is responsible for turning a line of GFF into a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2824 # feature object.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2825 # The $parent argument is a Bio::DB::GFF::Segment object and is used
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2826 # to establish the coordinate system for the new feature.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2827 # The $group_hash argument is an hash ref that holds previously-
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2828 # generated group objects.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2829 # Other arguments are taken right out of the GFF table.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2830 sub make_feature {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2831 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2832 my ($parent,$group_hash, # these arguments provided by generic mechanisms
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2833 $srcseq, # the rest is provided by adaptor
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2834 $start,$stop,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2835 $source,$method,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2836 $score,$strand,$phase,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2837 $group_class,$group_name,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2838 $tstart,$tstop,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2839 $db_id,$group_id) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2840
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2841 return unless $srcseq; # return undef if called with no arguments. This behavior is used for
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2842 # on-the-fly aggregation.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2843
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2844 my $group; # undefined
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2845 if (defined $group_class && defined $group_name) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2846 $tstart ||= '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2847 $tstop ||= '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2848 if ($group_hash) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2849 $group = $group_hash->{$group_class,$group_name,$tstart,$tstop}
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2850 ||= $self->make_object($group_class,$group_name,$tstart,$tstop);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2851 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2852 $group = $self->make_object($group_class,$group_name,$tstart,$tstop);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2853 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2854 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2855
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2856 # fix for some broken GFF files
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2857 # unfortunately - has undesired side effects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2858 # if (defined $tstart && defined $tstop && !defined $strand) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2859 # $strand = $tstart <= $tstop ? '+' : '-';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2860 # }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2861
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2862 if (ref $parent) { # note that the src sequence is ignored
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2863 return Bio::DB::GFF::Feature->new_from_parent($parent,$start,$stop,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2864 $method,$source,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2865 $score,$strand,$phase,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2866 $group,$db_id,$group_id,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2867 $tstart,$tstop);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2868 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2869 return Bio::DB::GFF::Feature->new($self,$srcseq,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2870 $start,$stop,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2871 $method,$source,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2872 $score,$strand,$phase,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2873 $group,$db_id,$group_id,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2874 $tstart,$tstop);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2875 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2876 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2877
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2878 sub make_aggregated_feature {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2879 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2880 my ($accumulated_features,$parent,$aggregators) = splice(@_,0,3);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2881 my $feature = $self->make_feature($parent,undef,@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2882 return [$feature] if $feature && !$feature->group;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2883
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2884 # if we have accumulated features and either:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2885 # (1) make_feature() returned undef, indicated very end or
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2886 # (2) the current group is different from the previous one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2887
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2888 local $^W = 0; # irritating uninitialized value warning in next statement
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2889 if (@$accumulated_features &&
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2890 (!defined($feature) || ($accumulated_features->[-1]->group ne $feature->group))) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2891 foreach my $a (@$aggregators) { # last aggregator gets first shot
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2892 $a->aggregate($accumulated_features,$self) or next;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2893 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2894 my @result = @$accumulated_features;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2895 @$accumulated_features = $feature ? ($feature) : ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2896 return unless @result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2897 return \@result ;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2898 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2899 push @$accumulated_features,$feature;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2900 return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2901 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2902
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2903 =head2 parse_types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2904
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2905 Title : parse_types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2906 Usage : $db->parse_types(@args)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2907 Function: parses list of types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2908 Returns : an array ref containing ['method','source'] pairs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2909 Args : a list of types in 'method:source' form
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2910 Status : internal
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2911
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2912 This method takes an array of type names in the format "method:source"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2913 and returns an array reference of ['method','source'] pairs. It will
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2914 also accept a single argument consisting of an array reference with
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2915 the list of type names.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2916
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2917 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2918
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2919 # turn feature types in the format "method:source" into a list of [method,source] refs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2920 sub parse_types {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2921 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2922 return [] if !@_ or !defined($_[0]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2923 return $_[0] if ref $_[0] eq 'ARRAY' && ref $_[0][0];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2924 my @types = ref($_[0]) ? @{$_[0]} : @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2925 my @type_list = map { [split(':',$_,2)] } @types;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2926 return \@type_list;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2927 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2928
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2929 =head2 make_match_sub
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2930
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2931 Title : make_match_sub
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2932 Usage : $db->make_match_sub($types)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2933 Function: creates a subroutine used for filtering features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2934 Returns : a code reference
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2935 Args : a list of parsed type names
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2936 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2937
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2938 This method is used internally to generate a code subroutine that will
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2939 accept or reject a feature based on its method and source. It takes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2940 an array of parsed type names in the format returned by parse_types(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2941 and generates an anonymous subroutine. The subroutine takes a single
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2942 Bio::DB::GFF::Feature object and returns true if the feature matches
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2943 one of the desired feature types, and false otherwise.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2944
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2945 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2946
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2947 # a subroutine that matches features indicated by list of types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2948 sub make_match_sub {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2949 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2950 my $types = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2951
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2952 return sub { 1 } unless ref $types && @$types;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2953
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2954 my @expr;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2955 for my $type (@$types) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2956 my ($method,$source) = @$type;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2957 $method ||= '.*';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2958 $source = $source ? ":$source" : "(?::.+)?";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2959 push @expr,"${method}${source}";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2960 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2961 my $expr = join '|',@expr;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2962 return $self->{match_subs}{$expr} if $self->{match_subs}{$expr};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2963
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2964 my $sub =<<END;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2965 sub {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2966 my \$feature = shift or return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2967 return \$feature->type =~ /^($expr)\$/i;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2968 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2969 END
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2970 warn "match sub: $sub\n" if $self->debug;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2971 my $compiled_sub = eval $sub;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2972 $self->throw($@) if $@;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2973 return $self->{match_subs}{$expr} = $compiled_sub;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2974 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2975
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2976 =head2 make_object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2977
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2978 Title : make_object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2979 Usage : $db->make_object($class,$name,$start,$stop)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2980 Function: creates a feature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2981 Returns : a feature object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2982 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2983 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2984
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2985 This method is called to make an object from the GFF "group" field.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2986 By default, all Target groups are turned into Bio::DB::GFF::Homol
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2987 objects, and everything else becomes a Bio::DB::GFF::Featname.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2988 However, adaptors are free to override this method to generate more
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2989 interesting objects, such as true BioPerl objects, or Acedb objects.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2990
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2991 Arguments are:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2992
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2993 $name database ID for object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2994 $class class of object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2995 $start for similarities, start of match inside object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2996 $stop for similarities, stop of match inside object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2997
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2998 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2999
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3000 # abstract call to turn a feature into an object, given its class and name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3001 sub make_object {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3002 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3003 my ($class,$name,$start,$stop) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3004 return Bio::DB::GFF::Homol->new($self,$class,$name,$start,$stop)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3005 if defined $start and length $start;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3006 return Bio::DB::GFF::Featname->new($class,$name);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3007 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3008
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3009
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3010 =head2 do_attributes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3011
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3012 Title : do_attributes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3013 Usage : $db->do_attributes($id [,$tag]);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3014 Function: internal method to retrieve attributes given an id and tag
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3015 Returns : a list of Bio::DB::GFF::Feature objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3016 Args : a feature id and a attribute tag (optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3017 Status : protected
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3018
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3019 This method is overridden by subclasses in order to return a list of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3020 attributes. If called with a tag, returns the value of attributes of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3021 that tag type. If called without a tag, returns a flattened array of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3022 (tag=E<gt>value) pairs. A particular tag can be present multiple times.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3023
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3024 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3025
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3026 sub do_attributes {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3027 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3028 my ($id,$tag) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3029 return ();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3030 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3031
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3032
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3033
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3034 =head1 Internal Methods
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3035
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3036 The following methods are internal to Bio::DB::GFF and are not
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3037 guaranteed to remain the same.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3038
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3039 =head2 _features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3040
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3041 Title : _features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3042 Usage : $db->_features($search,$options,$parent)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3043 Function: internal method
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3044 Returns : a list of Bio::DB::GFF::Feature objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3045 Args : see below
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3046 Status : internal
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3047
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3048 This is an internal method that is called by overlapping_features(),
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3049 contained_features() and features() to create features based on a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3050 parent segment's coordinate system. It takes three arguments, a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3051 search options hashref, an options hashref, and a parent segment.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3052
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3053 The search hashref contains the following keys:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3054
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3055 rangetype One of "overlaps", "contains" or "contained_in". Indicates
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3056 the type of range query requested.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3057 refseq reference sequence ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3058 refclass reference sequence class
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3059 start start of range
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3060 stop stop of range
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3061 types arrayref containing list of types in "method:source" form
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3062
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3063 The options hashref contains zero or more of the following keys:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3064
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3065 sparse turn on optimizations for a rare feature
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3066 automerge if true, invoke aggregators to merge features
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3067 iterator if true, return an iterator
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3068
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3069 The $parent argument is a scalar object containing a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3070 Bio::DB::GFF::RelSegment object or descendent.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3071
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3072 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3073
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3074 #'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3075
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3076 sub _features {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3077 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3078 my ($search,$options,$parent) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3079 (@{$search}{qw(start stop)}) = (@{$search}{qw(stop start)})
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3080 if defined($search->{start}) && $search->{start} > $search->{stop};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3081
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3082 my $types = $self->parse_types($search->{types}); # parse out list of types
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3083 my @aggregated_types = @$types; # keep a copy
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3084
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3085 # allow the aggregators to operate on the original
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3086 my @aggregators;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3087 if ($options->{automerge}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3088 for my $a ($self->aggregators) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3089 $a = $a->clone if $options->{iterator};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3090 unshift @aggregators,$a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3091 if $a->disaggregate(\@aggregated_types,$self);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3092 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3093 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3094
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3095 if ($options->{iterator}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3096 my @accumulated_features;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3097 my $callback = $options->{automerge} ? sub { $self->make_aggregated_feature(\@accumulated_features,$parent,\@aggregators,@_) }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3098 : sub { [$self->make_feature($parent,undef,@_)] };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3099 return $self->get_features_iterator({ %$search,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3100 types => \@aggregated_types },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3101 { %$options,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3102 sort_by_group => $options->{automerge} },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3103 $callback
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3104 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3105 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3106
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3107 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3108 my $features = [];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3109
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3110 my $callback = sub { push @$features,$self->make_feature($parent,\%groups,@_) };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3111 $self->get_features({ %$search,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3112 types => \@aggregated_types },
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3113 $options,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3114 $callback);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3115
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3116 if ($options->{automerge}) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3117 warn "aggregating...\n" if $self->debug;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3118 foreach my $a (@aggregators) { # last aggregator gets first shot
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3119 warn "Aggregator $a:\n" if $self->debug;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3120 $a->aggregate($features,$self);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3121 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3122 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3123
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3124 @$features;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3125 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3126
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3127 =head2 get_features_iterator
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3128
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3129 Title : get_features_iterator
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3130 Usage : $db->get_features_iterator($search,$options,$callback)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3131 Function: get an iterator on a features query
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3132 Returns : a Bio::SeqIO object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3133 Args : as per get_features()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3134 Status : Public
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3135
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3136 This method takes the same arguments as get_features(), but returns an
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3137 iterator that can be used to fetch features sequentially, as per
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3138 Bio::SeqIO.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3139
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3140 Internally, this method is simply a front end to range_query().
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3141 The latter method constructs and executes the query, returning a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3142 statement handle. This routine passes the statement handle to the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3143 constructor for the iterator, along with the callback.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3144
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3145 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3146
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3147 sub get_features_iterator {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3148 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3149 my ($search,$options,$callback) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3150 $self->throw('feature iteration is not implemented in this adaptor');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3151 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3152
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3153 =head2 split_group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3154
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3155 Title : split_group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3156 Usage : $db->split_group($group_field,$gff3_flag)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3157 Function: parse GFF group field
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3158 Returns : ($gclass,$gname,$tstart,$tstop,$attributes)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3159 Args : the gff group column and a flag indicating gff3 compatibility
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3160 Status : internal
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3161
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3162 This is a method that is called by load_gff_line to parse out the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3163 contents of one or more group fields. It returns the class of the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3164 group, its name, the start and stop of the target, if any, and an
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3165 array reference containing any attributes that were stuck into the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3166 group field, in [attribute_name,attribute_value] format.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3167
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3168 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3169
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3170 sub split_group {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3171 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3172 my ($group,$gff3) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3173 if ($gff3) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3174 my @groups = split /[;&]/,$group; # so easy!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3175 return $self->_split_gff3_group(@groups);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3176 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3177 # handle group parsing
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3178 # protect embedded semicolons in the group; there must be faster/more elegant way
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3179 # to do this.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3180 $group =~ s/\\;/$;/g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3181 while ($group =~ s/( \"[^\"]*);([^\"]*\")/$1$;$2/) { 1 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3182 my @groups = split(/\s*;\s*/,$group);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3183 foreach (@groups) { s/$;/;/g }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3184 return $self->_split_gff2_group(@groups);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3185 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3186 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3187
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3188 =head2 _split_gff2_group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3189
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3190 This is an internal method called by split_group().
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3191
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3192 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3193
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3194 sub _split_gff2_group {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3195 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3196 my @groups = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3197
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3198 my ($gclass,$gname,$tstart,$tstop,@attributes);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3199
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3200 for (@groups) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3201
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3202 my ($tag,$value) = /^(\S+)(?:\s+(.+))?/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3203 $value ||= '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3204 if ($value =~ /^\"(.+)\"$/) { #remove quotes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3205 $value = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3206 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3207 $value =~ s/\\t/\t/g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3208 $value =~ s/\\r/\r/g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3209
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3210 # Any additional groups become part of the attributes hash
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3211 # For historical reasons, the tag "Note" is treated as an
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3212 # attribute, even if it is the only group.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3213 $tag ||= '';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3214 if ($tag eq 'Note' or ($gclass && $gname)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3215 push @attributes,[$tag => $value];
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3216 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3217
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3218 # if the tag eq 'Target' then the class name is embedded in the ID
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3219 # (the GFF format is obviously screwed up here)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3220 elsif ($tag eq 'Target' && /([^:\"\s]+):([^\"\s]+)/) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3221 ($gclass,$gname) = ($1,$2);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3222 ($tstart,$tstop) = / (\d+) (\d+)/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3223 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3224
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3225 elsif (!$value) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3226 push @attributes,[Note => $tag]; # e.g. "Confirmed_by_EST"
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3227 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3228
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3229 # otherwise, the tag and value correspond to the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3230 # group class and name
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3231 else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3232 ($gclass,$gname) = ($tag,$value);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3233 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3234 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3235
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3236 return ($gclass,$gname,$tstart,$tstop,\@attributes);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3237 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3238
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3239 =head2 _split_gff3_group
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3240
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3241 This is called internally from split_group().
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3242
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3243 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3244
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3245 sub _split_gff3_group {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3246 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3247 my @groups = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3248 my ($gclass,$gname,$tstart,$tstop,@attributes);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3249
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3250 for my $group (@groups) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3251 my ($tag,$value) = split /=/,$group;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3252 $tag = unescape($tag);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3253 my @values = map {unescape($_)} split /,/,$value;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3254 if ($tag eq 'Parent') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3255 $gclass = 'Sequence';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3256 $gname = shift @values;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3257 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3258 elsif ($tag eq 'ID') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3259 $gclass = 'Sequence';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3260 $gname = shift @values;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3261 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3262 elsif ($tag eq 'Target') {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3263 $gclass = 'Sequence';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3264 ($gname,$tstart,$tstop) = split /\s+/,shift @values;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3265 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3266 push @attributes,[$tag=>$_] foreach @values;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3267 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3268 return ($gclass,$gname,$tstart,$tstop,\@attributes);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3269 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3270
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3271 =head2 _delete_features(), _delete_groups(),_delete()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3272
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3273 Title : _delete_features(), _delete_groups(),_delete()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3274 Usage : $count = $db->_delete_features(@feature_ids)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3275 $count = $db->_delete_groups(@group_ids)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3276 $count = $db->_delete(\%delete_spec)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3277 Function: low-level feature/group deleter
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3278 Returns : count of groups removed
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3279 Args : list of feature or group ids removed
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3280 Status : for implementation by subclasses
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3281
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3282 These methods need to be implemented in adaptors. For
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3283 _delete_features and _delete_groups, the arguments are a list of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3284 feature or group IDs to remove. For _delete(), the argument is a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3285 hashref with the three keys 'segments', 'types' and 'force'. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3286 first contains an arrayref of Bio::DB::GFF::RelSegment objects to
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3287 delete (all FEATURES within the segment are deleted). The second
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3288 contains an arrayref of [method,source] feature types to delete. The
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3289 two are ANDed together. If 'force' has a true value, this forces the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3290 operation to continue even if it would delete all features.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3291
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3292 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3293
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3294 sub _delete_features {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3295 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3296 my @feature_ids = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3297 $self->throw('_delete_features is not implemented in this adaptor');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3298 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3299
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3300 sub _delete_groups {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3301 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3302 my @group_ids = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3303 $self->throw('_delete_groups is not implemented in this adaptor');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3304 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3305
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3306 sub _delete {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3307 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3308 my $delete_options = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3309 $self->throw('_delete is not implemented in this adaptor');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3310 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3311
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3312 sub unescape {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3313 my $v = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3314 $v =~ tr/+/ /;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3315 $v =~ s/%([0-9a-fA-F]{2})/chr hex($1)/ge;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3316 return $v;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3317 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3318
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3319
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3320 package Bio::DB::GFF::ID_Iterator;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3321 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3322
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3323 use Bio::Root::Root;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3324 use vars '@ISA';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3325 @ISA = 'Bio::Root::Root';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3326
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3327 sub new {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3328 my $class = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3329 my ($db,$ids,$type) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3330 return bless {ids=>$ids,db=>$db,type=>$type},$class;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3331 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3332
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3333 sub next_seq {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3334 my $self = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3335 my $next = shift @{$self->{ids}};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3336 return unless $next;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3337 my $name = ref($next) eq 'ARRAY' ? Bio::DB::GFF::Featname->new(@$next) : $next;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3338 my $segment = $self->{type} eq 'name' ? $self->{db}->segment($name)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3339 : $self->{type} eq 'feature' ? $self->{db}->fetch_feature_by_id($name)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3340 : $self->{type} eq 'group' ? $self->{db}->fetch_feature_by_gid($name)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3341 : $self->throw("Bio::DB::GFF::ID_Iterator called to fetch an unknown type of identifier");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3342 $self->throw("id does not exist") unless $segment;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3343 return $segment;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3344 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3345
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3346 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3347
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3348 __END__
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3349
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3350 =head1 BUGS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3351
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3352 Features can only belong to a single group at a time. This must be
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3353 addressed soon.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3354
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3355 Start coordinate can be greater than stop coordinate for relative
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3356 addressing. This breaks strict BioPerl compatibility and must be
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3357 fixed.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3358
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3359 =head1 SEE ALSO
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3360
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3361 L<bioperl>,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3362 L<Bio::DB::GFF::RelSegment>,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3363 L<Bio::DB::GFF::Aggregator>,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3364 L<Bio::DB::GFF::Feature>,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3365 L<Bio::DB::GFF::Adaptor::dbi::mysqlopt>,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3366 L<Bio::DB::GFF::Adaptor::dbi::oracle>,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3367 L<Bio::DB::GFF::Adaptor::memory>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3368
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3369 =head1 AUTHOR
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3370
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3371 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3372
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3373 Copyright (c) 2001 Cold Spring Harbor Laboratory.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3374
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3375 This library is free software; you can redistribute it and/or modify
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3376 it under the same terms as Perl itself.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3377
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3378 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3379