annotate variant_effect_predictor/Bio/DB/GFF.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: GFF.pm,v 1.71.2.2 2003/09/12 13:29:32 lstein Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Bio::DB::GFF -- Storage and retrieval of sequence annotation data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 use Bio::DB::GFF;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 # Open the sequence database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 my $db = Bio::DB::GFF->new( -adaptor => 'dbi::mysqlopt',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 -dsn => 'dbi:mysql:elegans',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 -fasta => '/usr/local/fasta_files'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 # fetch a 1 megabase segment of sequence starting at landmark "ZK909"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 my $segment = $db->segment('ZK909', 1 => 1000000);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 # pull out all transcript features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 my @transcripts = $segment->features('transcript');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 # for each transcript, total the length of the introns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 my %totals;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 for my $t (@transcripts) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 my @introns = $t->Intron;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 $totals{$t->name} += $_->length foreach @introns;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 # Sort the exons of the first transcript by position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 my @exons = sort {$a->start <=> $b->start} $transcripts[0]->Exon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 # Get a region 1000 bp upstream of first exon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 my $upstream = $exons[0]->segment(-1000,0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 # get its DNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 my $dna = $upstream->dna;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 # and get all curated polymorphisms inside it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 @polymorphisms = $upstream->contained_features('polymorphism:curated');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 # get all feature types in the database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 my @types = $db->types;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 # count all feature types in the segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 my %type_counts = $segment->types(-enumerate=>1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 # get an iterator on all curated features of type 'exon' or 'intron'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 my $iterator = $db->get_seq_stream(-type => ['exon:curated','intron:curated']);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 while (my $s = $iterator->next_seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 print $s,"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 # find all transcripts annotated as having function 'kinase'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 my $iterator = $db->get_seq_stream(-type=>'transcript',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 -attributes=>{Function=>'kinase'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 while (my $s = $iterator->next_seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 print $s,"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 Bio::DB::GFF provides fast indexed access to a sequence annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 database. It supports multiple database types (ACeDB, relational),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 and multiple schemas through a system of adaptors and aggregators.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 The following operations are supported by this module:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 - retrieving a segment of sequence based on the ID of a landmark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 - retrieving the DNA from that segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 - finding all annotations that overlap with the segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 - finding all annotations that are completely contained within the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 - retrieving all annotations of a particular type, either within a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 segment, or globally
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 - conversion from absolute to relative coordinates and back again,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 using any arbitrary landmark for the relative coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 - using a sequence segment to create new segments based on relative
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 offsets
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 The data model used by Bio::DB::GFF is compatible with the GFF flat
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 file format (http://www.sanger.ac.uk/software/GFF). The module can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 load a set of GFF files into the database, and serves objects that
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 have methods corresponding to GFF fields.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 The objects returned by Bio::DB::GFF are compatible with the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 SeqFeatureI interface, allowing their use by the Bio::Graphics and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 Bio::DAS modules.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 =head2 Auxiliary Scripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 The bioperl distribution includes several scripts that make it easier
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 to work with Bio::DB::GFF databases. They are located in the scripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 directory under a subdirectory named Bio::DB::GFF:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 =item bp_load_gff.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 This script will load a Bio::DB::GFF database from a flat GFF file of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 sequence annotations. Only the relational database version of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 Bio::DB::GFF is supported. It can be used to create the database from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 scratch, as well as to incrementally load new data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 This script takes a --fasta argument to load raw DNA into the database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 as well. However, GFF databases do not require access to the raw DNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 for most of their functionality.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 load_gff.pl also has a --upgrade option, which will perform a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 non-destructive upgrade of older schemas to newer ones.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 =item bp_bulk_load_gff.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 This script will populate a Bio::DB::GFF database from a flat GFF file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 of sequence annotations. Only the MySQL database version of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 Bio::DB::GFF is supported. It uses the "LOAD DATA INFILE" query in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 order to accelerate loading considerably; however, it can only be used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 for the initial load, and not for updates.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 This script takes a --fasta argument to load raw DNA into the database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 as well. However, GFF databases do not require access to the raw DNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 for most of their functionality.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 =item bp_fast_load_gff.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 This script is as fast as bp_bulk_load_gff.pl but uses Unix pipe
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 tricks to allow for incremental updates. It only supports the MySQL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 database version of Bio::DB::GFF and is guaranteed not to work on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 non-Unix platforms.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 Arguments are the same as bp_load_gff.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 =item gadfly_to_gff.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 This script will convert the GFF-like format used by the Berkeley
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 Drosophila Sequencing project into a format suitable for use with this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 =item sgd_to_gff.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 This script will convert the tab-delimited feature files used by the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 Saccharomyces Genome Database into a format suitable for use with this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 =head2 GFF Fundamentals
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 The GFF format is a flat tab-delimited file, each line of which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 corresponds to an annotation, or feature. Each line has nine columns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 and looks like this:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 Chr1 curated CDS 365647 365963 . + 1 Transcript "R119.7"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 The 9 columns are as follows:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 =item 1. reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 This is the ID of the sequence that is used to establish the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 coordinate system of the annotation. In the example above, the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 reference sequence is "Chr1".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 =item 2. source
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 The source of the annotation. This field describes how the annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 was derived. In the example above, the source is "curated" to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 indicate that the feature is the result of human curation. The names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 and versions of software programs are often used for the source field,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 as in "tRNAScan-SE/1.2".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 =item 3. method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 The annotation method. This field describes the type of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 annotation, such as "CDS". Together the method and source describe
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 the annotation type.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 =item 4. start position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 The start of the annotation relative to the reference sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 =item 5. stop position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 The stop of the annotation relative to the reference sequence. Start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 is always less than or equal to stop.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 =item 6. score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 For annotations that are associated with a numeric score (for example,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 a sequence similarity), this field describes the score. The score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 units are completely unspecified, but for sequence similarities, it is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 typically percent identity. Annotations that don't have a score can
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 use "."
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 =item 7. strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 For those annotations which are strand-specific, this field is the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 strand on which the annotation resides. It is "+" for the forward
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 strand, "-" for the reverse strand, or "." for annotations that are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 not stranded.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 =item 8. phase
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 For annotations that are linked to proteins, this field describes the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 phase of the annotation on the codons. It is a number from 0 to 2, or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 "." for features that have no phase\.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 =item 9. group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 GFF provides a simple way of generating annotation hierarchies ("is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 composed of" relationships) by providing a group field. The group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 field contains the class and ID of an annotation which is the logical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 parent of the current one. In the example given above, the group is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 the Transcript named "R119.7".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 The group field is also used to store information about the target of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 sequence similarity hits, and miscellaneous notes. See the next
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 section for a description of how to describe similarity targets.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 The format of the group fields is "Class ID" with a single space (not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 a tab) separating the class from the ID. It is VERY IMPORTANT to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 follow this format, or grouping will not work properly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 The sequences used to establish the coordinate system for annotations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 can correspond to sequenced clones, clone fragments, contigs or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 super-contigs. Thus, this module can be used throughout the lifecycle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 of a sequencing project.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 In addition to a group ID, the GFF format allows annotations to have a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 group class. For example, in the ACeDB representation, RNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 interference experiments have a class of "RNAi" and an ID that is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 unique among the RNAi experiments. Since not all databases support
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 this notion, the class is optional in all calls to this module, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 defaults to "Sequence" when not provided.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 Double-quotes are sometimes used in GFF files around components of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 group field. Strictly, this is only necessary if the group name or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 class contains whitespace.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 =head2 Making GFF files work with this module
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 Some annotations do not need to be individually named. For example,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 it is probably not useful to assign a unique name to each ALU repeat
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 in a vertebrate genome. Others, such as predicted genes, correspond
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 to named biological objects; you probably want to be able to fetch the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 positions of these objects by referring to them by name.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 To accomodate named annotations, the GFF format places the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 class and name in the group field. The name identifies the object,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 and the class prevents similarly-named objects, for example clones and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 sequences, from collding.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 A named object is shown in the following excerpt from a GFF file:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 Chr1 curated transcript 939627 942410 . + . Transcript Y95B8A.2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 This object is a predicted transcript named Y95BA.2. In this case,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 the group field is used to identify the class and name of the object,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 even though no other annotation belongs to that group.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 It now becomes possible to retrieve the region of the genome covered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 by transcript Y95B8A.2 using the segment() method:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $segment = $db->segment(-class=>'Transcript',-name=>'Y95B8A.2');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 It is not necessary for the annotation's method to correspond to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 object class, although this is commonly the case.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 As explained above, each annotation in a GFF file refers to a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 reference sequence. It is important that each reference sequence also
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 be identified by a line in the GFF file. This allows the Bio::DB::GFF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 module to determine the length and class of the reference sequence,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 and makes it possible to do relative arithmetic.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 For example, if "Chr1" is used as a reference sequence, then it should
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 have an entry in the GFF file similar to this one:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 Chr1 assembly chromosome 1 14972282 . + . Sequence Chr1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 This indicates that the reference sequence named "Chr1" has length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 14972282 bp, method "chromosome" and source "assembly". In addition,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 as indicated by the group field, Chr1 has class "Sequence" and name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 "Chr1".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 The object class "Sequence" is used by default when the class is not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 specified in the segment() call. This allows you to use a shortcut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 form of the segment() method:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 $segment = $db->segment('Chr1'); # whole chromosome
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 $segment = $db->segment('Chr1',1=>1000); # first 1000 bp
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 For your convenience, if, during loading a GFF file, Bio::DB::GFF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 encounters a line like the following:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 ##sequence-region Chr1 1 14972282
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 It will automatically generate the following entry:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 Chr1 reference Component 1 14972282 . + . Sequence Chr1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 This is sufficient to use Chr1 as a reference point.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 The ##sequence-region line is frequently found in the GFF files
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 distributed by annotation groups.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 =head2 Sequence alignments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 There are two cases in which an annotation indicates the relationship
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 between two sequences. The first case is a similarity hit, where the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 annotation indicates an alignment. The second case is a map assembly,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 in which the annotation indicates that a portion of a larger sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 is built up from one or more smaller ones.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 Both cases are indicated by using the B<Target> tag in the group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 field. For example, a typical similarity hit will look like this:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 Chr1 BLASTX similarity 76953 77108 132 + 0 Target Protein:SW:ABL_DROME 493 544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 The group field contains the Target tag, followed by an identifier for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 the biological object referred to. The GFF format uses the notation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 I<Class>:I<Name> for the biological object, and even though this is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 stylistically inconsistent, that's the way it's done. The object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 identifier is followed by two integers indicating the start and stop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 of the alignment on the target sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 Unlike the main start and stop columns, it is possible for the target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 start to be greater than the target end. The previous example
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 indicates that the the section of Chr1 from 76,953 to 77,108 aligns to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 the protein SW:ABL_DROME starting at position 493 and extending to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 position 544.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 A similar notation is used for sequence assembly information as shown
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 in this example:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 Chr1 assembly Link 10922906 11177731 . . . Target Sequence:LINK_H06O01 1 254826
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 LINK_H06O01 assembly Cosmid 32386 64122 . . . Target Sequence:F49B2 6 31742
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 This indicates that the region between bases 10922906 and 11177731 of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 Chr1 are composed of LINK_H06O01 from bp 1 to bp 254826. The region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 of LINK_H0601 between 32386 and 64122 is, in turn, composed of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 bases 5 to 31742 of cosmid F49B2.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 =head2 Attributes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 While not intended to serve as a general-purpose sequence database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 (see bioperl-db for that), GFF allows you to tag features with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 arbitrary attributes. Attributes appear in the Group field following
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 the initial class/name pair. For example:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 Chr1 cur trans 939 942 . + . Transcript Y95B8A.2 ; Gene sma-3 ; Alias sma3
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 This line tags the feature named Transcript Y95B8A.2 as being "Gene"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 named sma-3 and having the Alias "sma3". Features having these
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 attributes can be looked up using the fetch_feature_by_attribute() method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 Two attributes have special meaning: "Note" is for backward
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 compatibility and is used for unstructured text remarks. "Alias" is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 considered as a synonym for the feature name and will be consulted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 when looking up a feature by its name.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 =head2 Adaptors and Aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 This module uses a system of adaptors and aggregators in order to make
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 it adaptable to use with a variety of databases.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 =item Adaptors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 The core of the module handles the user API, annotation coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 arithmetic, and other common issues. The details of fetching
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 information from databases is handled by an adaptor, which is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 specified during Bio::DB::GFF construction. The adaptor encapsulates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 database-specific information such as the schema, user authentication
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 and access methods.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 Currently there are two adaptors: 'dbi::mysql' and 'dbi::mysqlopt'.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 The former is an interface to a simple Mysql schema. The latter is an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 optimized version of dbi::mysql which uses a binning scheme to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 accelerate range queries and the Bio::DB::Fasta module for rapid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 retrieval of sequences. Note the double-colon between the words.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 =item Aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 The GFF format uses a "group" field to indicate aggregation properties
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 of individual features. For example, a set of exons and introns may
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 share a common transcript group, and multiple transcripts may share
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 the same gene group.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 Aggregators are small modules that use the group information to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 rebuild the hierarchy. When a Bio::DB::GFF object is created, you
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 indicate that it use a set of one or more aggregators. Each
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 aggregator provides a new composite annotation type. Before the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 database query is generated each aggregator is called to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 "disaggregate" its annotation type into list of component types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 contained in the database. After the query is generated, each
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 aggregator is called again in order to build composite annotations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 from the returned components.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 For example, during disaggregation, the standard
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 "processed_transcript" aggregator generates a list of component
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 feature types including "UTR", "CDS", and "polyA_site". Later, it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 aggregates these features into a set of annotations of type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 "processed_transcript".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 During aggregation, the list of aggregators is called in reverse
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 order. This allows aggregators to collaborate to create multi-level
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 structures: the transcript aggregator assembles transcripts from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 introns and exons; the gene aggregator then assembles genes from sets
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 of transcripts.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 Three default aggregators are provided:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 transcript assembles transcripts from features of type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 exon, CDS, 5'UTR, 3'UTR, TSS, and PolyA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 clone assembles clones from Clone_left_end, Clone_right_end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 and Sequence features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 alignment assembles gapped alignments from features of type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 "similarity".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 In addition, this module provides the optional "wormbase_gene"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 aggregator, which accomodates the WormBase representation of genes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 This aggregator aggregates features of method "exon", "CDS", "5'UTR",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 "3'UTR", "polyA" and "TSS" into a single object. It also expects to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 find a single feature of type "Sequence" that spans the entire gene.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 The existing aggregators are easily customized.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 Note that aggregation will not occur unless you specifically request
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 the aggregation type. For example, this call:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 @features = $segment->features('alignment');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 will generate an array of aggregated alignment features. However,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 this call:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 @features = $segment->features();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 will return a list of unaggregated similarity segments.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 For more informnation, see the manual pages for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 Bio::DB::GFF::Aggregator::processed_transcript, Bio::DB::GFF::Aggregator::clone,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 etc.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 =head1 API
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 The following is the API for Bio::DB::GFF.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 package Bio::DB::GFF;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 use Bio::DB::GFF::Util::Rearrange;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 use Bio::DB::GFF::RelSegment;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 use Bio::DB::GFF::Feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 use Bio::DB::GFF::Aggregator;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 use Bio::DasI;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 use Bio::Root::Root;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 use vars qw(@ISA $VERSION);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 @ISA = qw(Bio::Root::Root Bio::DasI);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 $VERSION = '1.2003';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 my %valid_range_types = (overlaps => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 contains => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 contained_in => 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 =head1 Querying GFF Databases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 Title : new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 Usage : my $db = new Bio::DB::GFF(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 Function: create a new Bio::DB::GFF object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 Returns : new Bio::DB::GFF object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 Args : lists of adaptors and aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 These are the arguments:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 -adaptor Name of the adaptor module to use. If none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 provided, defaults to "dbi::mysqlopt".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 -aggregator Array reference to a list of aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 to apply to the database. If none provided,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 defaults to ['processed_transcript','alignment'].
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 <other> Any other named argument pairs are passed to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 the adaptor for processing.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 The adaptor argument must correspond to a module contained within the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 Bio::DB::GFF::Adaptor namespace. For example, the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 Bio::DB::GFF::Adaptor::dbi::mysql adaptor is loaded by specifying
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 'dbi::mysql'. By Perl convention, the adaptors names are lower case
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 because they are loaded at run time.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 The aggregator array may contain a list of aggregator names, a list of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 initialized aggregator objects, or a string in the form
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 "aggregator_name{subpart1,subpart2,subpart3/main_method}" (the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 /main_method part is optional). For example, if you wish to change
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 the components aggregated by the transcript aggregator, you could pass
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 it to the GFF constructor this way:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 my $transcript =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 Bio::DB::Aggregator::transcript->new(-sub_parts=>[qw(exon intron utr
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 polyA spliced_leader)]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 my $db = Bio::DB::GFF->new(-aggregator=>[$transcript,'clone','alignment],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 -adaptor => 'dbi::mysql',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 -dsn => 'dbi:mysql:elegans42');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 Alternatively, you could create an entirely new transcript aggregator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 this way:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 my $new_agg = 'transcript{exon,intron,utr,polyA,spliced_leader}';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 my $db = Bio::DB::GFF->new(-aggregator=>[$new_agg,'clone','alignment],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 -adaptor => 'dbi::mysql',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 -dsn => 'dbi:mysql:elegans42');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 See L<Bio::DB::GFF::Aggregator> for more details.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 The commonly used 'dbi::mysql' adaptor recognizes the following
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 adaptor-specific arguments:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 Argument Description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 -------- -----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 -dsn the DBI data source, e.g. 'dbi:mysql:ens0040'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 If a partial name is given, such as "ens0040", the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 "dbi:mysql:" prefix will be added automatically.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 -user username for authentication
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 -pass the password for authentication
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 -refclass landmark Class; defaults to "Sequence"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 The commonly used 'dbi::mysqlopt' adaptor also recogizes the following
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 arguments.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 Argument Description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 -------- -----------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 -fasta path to a directory containing FASTA files for the DNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 contained in this database (e.g. "/usr/local/share/fasta")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 -acedb an acedb URL to use when converting features into ACEDB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 objects (e.g. sace://localhost:2005)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 my $package = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 my ($adaptor,$aggregators,$args,$refclass);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 if (@_ == 1) { # special case, default to dbi::mysqlopt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 $adaptor = 'dbi::mysqlopt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 $args = {DSN => shift};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 ($adaptor,$aggregators,$refclass,$args) = rearrange([
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 [qw(ADAPTOR FACTORY)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 [qw(AGGREGATOR AGGREGATORS)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 'REFCLASS',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 ],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 $adaptor ||= 'dbi::mysqlopt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 my $class = "Bio::DB::GFF::Adaptor::\L${adaptor}\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 eval "require $class" unless $class->can('new');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 $package->throw("Unable to load $adaptor adaptor: $@") if $@;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 my $self = $class->new($args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 $self->default_class($refclass) if defined $refclass;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 # handle the aggregators.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 # aggregators are responsible for creating complex multi-part features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 # from the GFF "group" field. If none are provided, then we provide a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 # list of the two used in WormBase.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 # Each aggregator can be a scalar or a ref. In the former case
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 # it is treated as a class name to call new() on. In the latter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 # the aggreator is treated as a ready made object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 $aggregators = $self->default_aggregators unless defined $aggregators;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 my @a = ref($aggregators) eq 'ARRAY' ? @$aggregators : $aggregators;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 for my $a (@a) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 $self->add_aggregator($a);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 # default settings go here.....
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 $self->automerge(1); # set automerge to true
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 =head2 types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 Title : types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 Usage : $db->types(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 Function: return list of feature types in range or database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 Returns : a list of Bio::DB::GFF::Typename objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 Args : see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 This routine returns a list of feature types known to the database.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 The list can be database-wide or restricted to a region. It is also
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 possible to find out how many times each feature occurs.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 For range queries, it is usually more convenient to create a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 Bio::DB::GFF::Segment object, and then invoke it's types() method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 Arguments are as follows:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 -ref ID of reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 -class class of reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 -start start of segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 -stop stop of segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 -enumerate if true, count the features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 The returned value will be a list of Bio::DB::GFF::Typename objects,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 which if evaluated in a string context will return the feature type in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 "method:source" format. This object class also has method() and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 source() methods for retrieving the like-named fields.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 If -enumerate is true, then the function returns a hash (not a hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 reference) in which the keys are type names in "method:source" format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 and the values are the number of times each feature appears in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 database or segment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 The argument -end is a synonum for -stop, and -count is a synonym for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 -enumerate.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 sub types {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 my ($refseq,$start,$stop,$enumerate,$refclass,$types) = rearrange ([
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 [qw(REF REFSEQ)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 qw(START),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 [qw(STOP END)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 [qw(ENUMERATE COUNT)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 [qw(CLASS SEQCLASS)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 [qw(TYPE TYPES)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 ],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 $types = $self->parse_types($types) if defined $types;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 $self->get_types($refseq,$refclass,$start,$stop,$enumerate,$types);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 =head2 classes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 Title : classes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 Usage : $db->classes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 Function: return list of landmark classes in database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 Returns : a list of classes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 This routine returns the list of reference classes known to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 database, or empty if classes are not used by the database. Classes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 are distinct from types, being essentially qualifiers on the reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 namespaces.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 sub classes {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 return ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 =head2 segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 Title : segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 Usage : $db->segment(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 Function: create a segment object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 Returns : segment object(s)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 Args : numerous, see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 This method generates a segment object, which is a Perl object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 subclassed from Bio::DB::GFF::Segment. The segment can be used to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 find overlapping features and the raw DNA.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 When making the segment() call, you specify the ID of a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 landmark (e.g. an accession number, a clone or contig), and a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 positional range relative to the landmark. If no range is specified,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 then the entire extent of the landmark is used to generate the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 segment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 You may also provide the ID of a "reference" sequence, which will set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 the coordinate system and orientation used for all features contained
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 within the segment. The reference sequence can be changed later. If
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 no reference sequence is provided, then the coordinate system is based
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 on the landmark.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 Arguments:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 -name ID of the landmark sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 -class Database object class for the landmark sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 "Sequence" assumed if not specified. This is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 irrelevant for databases which do not recognize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 object classes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 -start Start of the segment relative to landmark. Positions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 follow standard 1-based sequence rules. If not specified,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 defaults to the beginning of the landmark.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 -end Stop of the segment relative to the landmark. If not specified,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 defaults to the end of the landmark.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 -stop Same as -end.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 -offset For those who prefer 0-based indexing, the offset specifies the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 position of the new segment relative to the start of the landmark.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 -length For those who prefer 0-based indexing, the length specifies the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725 length of the new segment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 -refseq Specifies the ID of the reference landmark used to establish the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 coordinate system for the newly-created segment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 -refclass Specifies the class of the reference landmark, for those databases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 that distinguish different object classes. Defaults to "Sequence".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 -absolute
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 Return features in absolute coordinates rather than relative to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 parent segment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 -nocheck Don't check the database for the coordinates and length of this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 feature. Construct a segment using the indicated name as the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 reference, a start coordinate of 1, an undefined end coordinate,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 and a strand of +1.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 -force Same as -nocheck.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 -seq,-sequence,-sourceseq Aliases for -name.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 -begin,-end Aliases for -start and -stop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 -off,-len Aliases for -offset and -length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 -seqclass Alias for -class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 Here's an example to explain how this works:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 my $db = Bio::DB::GFF->new(-dsn => 'dbi:mysql:human',-adaptor=>'dbi::mysql');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756 If successful, $db will now hold the database accessor object. We now
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 try to fetch the fragment of sequence whose ID is A0000182 and class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 is "Accession."
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 my $segment = $db->segment(-name=>'A0000182',-class=>'Accession');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 If successful, $segment now holds the entire segment corresponding to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 this accession number. By default, the sequence is used as its own
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 reference sequence, so its first base will be 1 and its last base will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 be the length of the accession.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 Assuming that this sequence belongs to a longer stretch of DNA, say a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 contig, we can fetch this information like so:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 my $sourceseq = $segment->sourceseq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 and find the start and stop on the source like this:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 my $start = $segment->abs_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 my $stop = $segment->abs_stop;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 If we had another segment, say $s2, which is on the same contiguous
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 piece of DNA, we can pass that to the refseq() method in order to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 establish it as the coordinate reference point:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 $segment->refseq($s2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 Now calling start() will return the start of the segment relative to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784 the beginning of $s2, accounting for differences in strandedness:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 my $rel_start = $segment->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 IMPORTANT NOTE: This method can be used to return the segment spanned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 by an arbitrary named annotation. However, if the annotation appears
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 at multiple locations on the genome, for example an EST that maps to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 multiple locations, then, provided that all locations reside on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 same physical segment, the method will return a segment that spans the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793 minimum and maximum positions. If the reference sequence occupies
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 ranges on different physical segments, then it returns them all in an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 array context, and raises a "multiple segment exception" exception in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 a scalar context.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 sub segment {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 my @segments = Bio::DB::GFF::RelSegment->new(-factory => $self,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 $self->setup_segment_args(@_));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 foreach (@segments) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 $_->absolute(1) if $self->absolute;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 $self->_multiple_return_args(@segments);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 sub _multiple_return_args {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 my @args = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 if (@args == 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 } elsif (@args == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819 return $args[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 } elsif (wantarray) { # more than one reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 return @args;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 $self->error($args[0]->name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 " has more than one reference sequence in database. Please call in a list context to retrieve them all.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 $self->throw('multiple segment exception');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 # backward compatibility -- don't use!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 # (deliberately undocumented too)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 sub abs_segment {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 return $self->segment($self->setup_segment_args(@_),-absolute=>1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 sub setup_segment_args {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 return @_ if defined $_[0] && $_[0] =~ /^-/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 return (-name=>$_[0],-start=>$_[1],-stop=>$_[2]) if @_ == 3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 return (-class=>$_[0],-name=>$_[1]) if @_ == 2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 return (-name=>$_[0]) if @_ == 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 =head2 features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 Title : features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 Usage : $db->features(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 Function: get all features, possibly filtered by type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 Returns : a list of Bio::DB::GFF::Feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 Args : see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 This routine will retrieve features in the database regardless of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856 position. It can be used to return all features, or a subset based on
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 their method and source.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 Arguments are as follows:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 -types List of feature types to return. Argument is an array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 reference containing strings of the format "method:source"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 -merge Whether to apply aggregators to the generated features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 -rare Turn on optimizations suitable for a relatively rare feature type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 where it makes more sense to filter by feature type first,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 and then by position.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 -attributes A hash reference containing attributes to match.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872 -iterator Whether to return an iterator across the features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 -binsize A true value will create a set of artificial features whose
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 start and stop positions indicate bins of the given size, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 whose scores are the number of features in the bin. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 class and method of the feature will be set to "bin",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878 its source to "method:source", and its group to "bin:method:source".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879 This is a handy way of generating histograms of feature density.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 If -iterator is true, then the method returns a single scalar value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882 consisting of a Bio::SeqIO object. You can call next_seq() repeatedly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 on this object to fetch each of the features in turn. If iterator is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884 false or absent, then all the features are returned as a list.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886 Currently aggregation is disabled when iterating over a series of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 Types are indicated using the nomenclature "method:source". Either of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 these fields can be omitted, in which case a wildcard is used for the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 missing field. Type names without the colon (e.g. "exon") are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 interpreted as the method name and a source wild card. Regular
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 expressions are allowed in either field, as in: "similarity:BLAST.*".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 The -attributes argument is a hashref containing one or more attributes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896 to match against:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 -attributes => { Gene => 'abc-1',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899 Note => 'confirmed' }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901 Attribute matching is simple string matching, and multiple attributes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 are ANDed together.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 sub features {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 my ($types,$automerge,$sparse,$iterator,$other);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 if (defined $_[0] &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910 $_[0] =~ /^-/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 ($types,$automerge,$sparse,$iterator,$other) = rearrange([
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912 [qw(TYPE TYPES)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913 [qw(MERGE AUTOMERGE)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 [qw(RARE SPARSE)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 'ITERATOR'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 ],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 $types = \@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 # for whole database retrievals, we probably don't want to automerge!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922 $automerge = $self->automerge unless defined $automerge;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923 $other ||= {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 $self->_features({
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925 rangetype => 'contains',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 types => $types,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 { sparse => $sparse,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 automerge => $automerge,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 iterator =>$iterator,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 %$other,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936 =head2 get_seq_stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 Title : get_seq_stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 Usage : my $seqio = $self->get_seq_sream(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 Function: Performs a query and returns an iterator over it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 Returns : a Bio::SeqIO stream capable of producing sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 Args : As in features()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 This routine takes the same arguments as features(), but returns a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 Bio::SeqIO::Stream-compliant object. Use it like this:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 $stream = $db->get_seq_stream('exon');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949 while (my $exon = $stream->next_seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 print $exon,"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953 NOTE: This is also called get_feature_stream(), since that's what it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 really does.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958 sub get_seq_stream {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 my @args = !defined($_[0]) || $_[0] =~ /^-/ ? (@_,-iterator=>1)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961 : (-types=>\@_,-iterator=>1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 $self->features(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 *get_feature_stream = \&get_seq_stream;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967 =head2 get_feature_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 Title : get_feature_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970 Usage : $db->get_feature_by_name($class => $name)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 Function: fetch features by their name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 Returns : a list of Bio::DB::GFF::Feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 Args : the class and name of the desired feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 This method can be used to fetch a named feature from the database.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977 GFF annotations are named using the group class and name fields, so
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978 for features that belong to a group of size one, this method can be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979 used to retrieve that group (and is equivalent to the segment()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980 method). Any Alias attributes are also searched for matching names.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 An alternative syntax allows you to search for features by name within
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 a circumscribed region:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 @f = $db->get_feature_by_name(-class => $class,-name=>$name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 -ref => $sequence_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 -start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 -end => $end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 This method may return zero, one, or several Bio::DB::GFF::Feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991 objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 Aggregation is performed on features as usual.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995 NOTE: At various times, this function was called fetch_group(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 fetch_feature(), fetch_feature_by_name() and segments(). These names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 are preserved for backward compatibility.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 sub get_feature_by_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003 my ($gclass,$gname,$automerge,$ref,$start,$end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 if (@_ == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005 $gclass = $self->default_class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006 $gname = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 ($gclass,$gname,$automerge,$ref,$start,$end) = rearrange(['CLASS','NAME','AUTOMERGE',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 ['REF','REFSEQ'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 'START',['STOP','END']
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 ],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012 $gclass ||= $self->default_class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 $automerge = $self->automerge unless defined $automerge;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 # we need to refactor this... It's repeated code (see below)...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 my @aggregators;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018 if ($automerge) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019 for my $a ($self->aggregators) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 push @aggregators,$a if $a->disaggregate([],$self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025 my $features = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026 my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 my $location = [$ref,$start,$end] if defined $ref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028 $self->_feature_by_name($gclass,$gname,$location,$callback);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 warn "aggregating...\n" if $self->debug;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 foreach my $a (@aggregators) { # last aggregator gets first shot
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 $a->aggregate($features,$self) or next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035 @$features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038 # horrible indecision regarding proper names!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 *fetch_group = *fetch_feature = *fetch_feature_by_name = \&get_feature_by_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040 *segments = \&segment;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 =head2 get_feature_by_target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044 Title : get_feature_by_target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045 Usage : $db->get_feature_by_target($class => $name)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046 Function: fetch features by their similarity target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 Returns : a list of Bio::DB::GFF::Feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048 Args : the class and name of the desired feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051 This method can be used to fetch a named feature from the database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052 based on its similarity hit.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056 sub get_feature_by_target {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057 shift->get_feature_by_name(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060 =head2 get_feature_by_attribute
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 Title : get_feature_by_attribute
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063 Usage : $db->get_feature_by_attribute(attribute1=>value1,attribute2=>value2)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 Function: fetch segments by combinations of attribute values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065 Returns : a list of Bio::DB::GFF::Feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066 Args : the class and name of the desired feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069 This method can be used to fetch a set of features from the database.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070 Attributes are a list of name=E<gt>value pairs. They will be logically
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 ANDED together.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075 sub get_feature_by_attribute {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077 my %attributes = ref($_[0]) ? %{$_[0]} : @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079 # we need to refactor this... It's repeated code (see above)...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 my @aggregators;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 if ($self->automerge) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 for my $a ($self->aggregators) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083 unshift @aggregators,$a if $a->disaggregate([],$self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088 my $features = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 $self->_feature_by_attribute(\%attributes,$callback);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092 warn "aggregating...\n" if $self->debug;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093 foreach my $a (@aggregators) { # last aggregator gets first shot
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094 $a->aggregate($features,$self) or next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097 @$features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100 # more indecision...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 *fetch_feature_by_attribute = \&get_feature_by_attribute;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103 =head2 get_feature_by_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 Title : get_feature_by_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 Usage : $db->get_feature_by_id($id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107 Function: fetch segments by feature ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 Returns : a Bio::DB::GFF::Feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109 Args : the feature ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 This method can be used to fetch a feature from the database using its
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113 ID. Not all GFF databases support IDs, so be careful with this.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117 sub get_feature_by_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119 my $id = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 my $features = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123 $self->_feature_by_id($id,'feature',$callback);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 return wantarray ? @$features : $features->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126 *fetch_feature_by_id = \&get_feature_by_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 =head2 get_feature_by_gid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130 Title : get_feature_by_gid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131 Usage : $db->get_feature_by_gid($id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132 Function: fetch segments by feature ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133 Returns : a Bio::DB::GFF::Feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 Args : the feature ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137 This method can be used to fetch a feature from the database using its
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138 group ID. Not all GFF databases support IDs, so be careful with this.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 The group ID is often more interesting than the feature ID, since
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141 groups can be complex objects containing subobjects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 sub get_feature_by_gid {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147 my $id = ref($_[0]) eq 'ARRAY' ? $_[0] : \@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 my $features = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150 my $callback = sub { push @$features,$self->make_feature(undef,\%groups,@_) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151 $self->_feature_by_id($id,'group',$callback);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 return wantarray ? @$features : $features->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 *fetch_feature_by_gid = \&get_feature_by_gid;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156 =head2 delete_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158 Title : delete_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159 Usage : $db->delete_features(@ids_or_features)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160 Function: delete one or more features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161 Returns : count of features deleted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 Args : list of features or feature ids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 Pass this method a list of numeric feature ids or a set of features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166 It will attempt to remove the features from the database and return a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167 count of the features removed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169 NOTE: This method is also called delete_feature(). Also see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170 delete_groups().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174 *delete_feature = \&delete_features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176 sub delete_features {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 my @features_or_ids = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179 my @ids = map {UNIVERSAL::isa($_,'Bio::DB::GFF::Feature') ? $_->id : $_} @features_or_ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 return unless @ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181 $self->_delete_features(@ids);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184 =head2 delete_groups
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186 Title : delete_groups
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187 Usage : $db->delete_groups(@ids_or_features)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188 Function: delete one or more feature groups
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189 Returns : count of features deleted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190 Args : list of features or feature group ids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 Pass this method a list of numeric group ids or a set of features. It
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194 will attempt to recursively remove the features and ALL members of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 their group from the database. It returns a count of the number of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196 features (not groups) returned.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198 NOTE: This method is also called delete_group(). Also see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199 delete_features().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203 *delete_group = \&delete_groupss;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205 sub delete_groups {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 my @features_or_ids = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208 my @ids = map {UNIVERSAL::isa($_,'Bio::DB::GFF::Feature') ? $_->group_id : $_} @features_or_ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209 return unless @ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210 $self->_delete_groups(@ids);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213 =head2 delete
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215 Title : delete
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 Usage : $db->delete(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217 Function: delete features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218 Returns : count of features deleted -- if available
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219 Args : numerous, see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222 This method deletes all features that overlap the specified region or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223 are of a particular type. If no arguments are provided and the -force
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224 argument is true, then deletes ALL features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 Arguments:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 -name ID of the landmark sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230 -ref ID of the landmark sequence (synonym for -name).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232 -class Database object class for the landmark sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 "Sequence" assumed if not specified. This is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234 irrelevant for databases which do not recognize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235 object classes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237 -start Start of the segment relative to landmark. Positions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238 follow standard 1-based sequence rules. If not specified,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239 defaults to the beginning of the landmark.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241 -end Stop of the segment relative to the landmark. If not specified,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242 defaults to the end of the landmark.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244 -offset Zero-based addressing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246 -length Length of region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248 -type,-types Either a single scalar type to be deleted, or an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249 reference to an array of types.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1251 -force Force operation to be performed even if it would delete
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1252 entire feature table.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1254 -range_type Control the range type of the deletion. One of "overlaps" (default)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1255 "contains" or "contained_in"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1256
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1257 Examples:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1258
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1259 $db->delete(-type=>['intron','repeat:repeatMasker']); # remove all introns & repeats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1260 $db->delete(-name=>'chr3',-start=>1,-end=>1000); # remove annotations on chr3 from 1 to 1000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1261 $db->delete(-name=>'chr3',-type=>'exon'); # remove all exons on chr3
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1263 The short form of this call, as described in segment() is also allowed:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1265 $db->delete("chr3",1=>1000);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1266 $db->delete("chr3");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1268 IMPORTANT NOTE: This method only deletes features. It does *NOT*
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1269 delete the names of groups that contain the deleted features. Group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1270 IDs will be reused if you later load a feature with the same group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1271 name as one that was previously deleted.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1273 NOTE ON FEATURE COUNTS: The DBI-based versions of this call return the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1274 result code from the SQL DELETE operation. Some dbd drivers return the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1275 count of rows deleted, while others return 0E0. Caveat emptor.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1277 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1279 sub delete {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1280 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1281 my @args = $self->setup_segment_args(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1282 my ($name,$class,$start,$end,$offset,$length,$type,$force,$range_type) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1283 rearrange([['NAME','REF'],'CLASS','START',[qw(END STOP)],'OFFSET',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1284 'LENGTH',[qw(TYPE TYPES)],'FORCE','RANGE_TYPE'],@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1285 $offset = 0 unless defined $offset;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1286 $start = $offset+1 unless defined $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1287 $end = $start+$length-1 if !defined $end and $length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1288 $class ||= $self->default_class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1290 my $types = $self->parse_types($type); # parse out list of types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1292 $range_type ||= 'overlaps';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1293 $self->throw("range type must be one of {".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1294 join(',',keys %valid_range_types).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1295 "}\n")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1296 unless $valid_range_types{lc $range_type};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1299 my @segments;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1300 if (defined $name && $name ne '') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1301 my @args = (-name=>$name,-class=>$class);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1302 push @args,(-start=>$start) if defined $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1303 push @args,(-end =>$end) if defined $end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1304 @segments = $self->segment(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1305 return unless @segments;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1306 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1307 $self->_delete({segments => \@segments,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1308 types => $types,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1309 range_type => $range_type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1310 force => $force}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1311 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1312 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1314 =head2 absolute
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1316 Title : absolute
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1317 Usage : $abs = $db->absolute([$abs]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1318 Function: gets/sets absolute mode
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1319 Returns : current setting of absolute mode boolean
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1320 Args : new setting for absolute mode boolean
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1321 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1323 $db-E<gt>absolute(1) will turn on absolute mode for the entire database.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1324 All segments retrieved will use absolute coordinates by default,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1325 rather than relative coordinates. You can still set them to use
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1326 relative coordinates by calling $segment-E<gt>absolute(0).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1328 Note that this is not the same as calling abs_segment(); it continues
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1329 to allow you to look up groups that are not used directly as reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1330 sequences.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1332 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1334 sub absolute {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1335 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1336 my $d = $self->{absolute};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1337 $self->{absolute} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1338 $d;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1339 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1341 =head2 strict_bounds_checking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1343 Title : strict_bounds_checking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1344 Usage : $flag = $db->strict_bounds_checking([$flag])
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1345 Function: gets/sets strict bounds checking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1346 Returns : current setting of bounds checking flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1347 Args : new setting for bounds checking flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1348 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1350 This flag enables extra checks for segment requests that go beyond the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1351 ends of their reference sequences. If bounds checking is enabled,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1352 then retrieved segments will be truncated to their physical length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1353 and their truncated() methods will return true.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1355 If the flag is off (the default), then the module will return segments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1356 that appear to extend beyond their physical boundaries. Requests for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1357 features beyond the end of the segment will, however, return empty.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1359 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1360
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1361 sub strict_bounds_checking {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1362 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1363 my $d = $self->{strict};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1364 $self->{strict} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1365 $d;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1366 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1367
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1368 =head2 get_Seq_by_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1370 Title : get_Seq_by_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1371 Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1372 Function: Gets a Bio::Seq object by its name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1373 Returns : a Bio::Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1374 Args : the id (as a string) of a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1375 Throws : "id does not exist" exception
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1377 NOTE: Bio::DB::RandomAccessI compliant method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1379 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1381 sub get_Seq_by_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1382 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1383 my $id = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1384 my $stream = $self->get_Stream_by_id($id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1385 return $stream->next_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1386 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1389 =head2 get_Seq_by_accession
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1390
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1391 Title : get_Seq_by_accession
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1392 Usage : $seq = $db->get_Seq_by_accession('AL12234')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1393 Function: Gets a Bio::Seq object by its accession
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1394 Returns : a Bio::Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1395 Args : the id (as a string) of a sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1396 Throws : "id does not exist" exception
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1398 NOTE: Bio::DB::RandomAccessI compliant method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1399
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1400 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1402 sub get_Seq_by_accession {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1403 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1404 my $id = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1405 my $stream = $self->get_Stream_by_accession($id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1406 return $stream->next_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1407 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1408
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1409 =head2 get_Stream_by_acc ()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1410
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1411 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1412
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1413 =head2 get_Seq_by_acc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1415 Title : get_Seq_by_acc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1416 Usage : $seq = $db->get_Seq_by_acc('X77802');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1417 Function: Gets a Bio::Seq object by accession number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1418 Returns : A Bio::Seq object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1419 Args : accession number (as a string)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1420 Throws : "acc does not exist" exception
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1422 NOTE: Bio::DB::RandomAccessI compliant method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1424 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1426 sub get_Stream_by_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1427 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1428 my @ids = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1429 my $id = ref($ids[0]) ? $ids[0] : \@ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1430 Bio::DB::GFF::ID_Iterator->new($self,$id,'name');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1431 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1433 =head2 get_Stream_by_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1435 Title : get_Stream_by_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1436 Usage : $seq = $db->get_Stream_by_id(@ids);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1437 Function: Retrieves a stream of Seq objects given their ids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1438 Returns : a Bio::SeqIO stream object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1439 Args : an array of unique ids/accession numbers, or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1440 an array reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1442 NOTE: This is also called get_Stream_by_batch()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1444 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1446 sub get_Stream_by_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1447 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1448 my @ids = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1449 my $id = ref($ids[0]) ? $ids[0] : \@ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1450 Bio::DB::GFF::ID_Iterator->new($self,$id,'feature');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1451 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1452
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1453 =head2 get_Stream_by_batch ()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1454
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1455 Title : get_Stream_by_batch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1456 Usage : $seq = $db->get_Stream_by_batch(@ids);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1457 Function: Retrieves a stream of Seq objects given their ids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1458 Returns : a Bio::SeqIO stream object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1459 Args : an array of unique ids/accession numbers, or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1460 an array reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1461
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1462 NOTE: This is the same as get_Stream_by_id().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1463
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1464 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1465
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1466 *get_Stream_by_batch = \&get_Stream_by_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1467
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1469 =head2 get_Stream_by_group ()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1471 Bioperl compatibility.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1473 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1475 sub get_Stream_by_group {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1476 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1477 my @ids = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1478 my $id = ref($ids[0]) ? $ids[0] : \@ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1479 Bio::DB::GFF::ID_Iterator->new($self,$id,'group');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1480 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1481
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1482 =head2 all_seqfeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1484 Title : all_seqfeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1485 Usage : @features = $db->all_seqfeatures(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1486 Function: fetch all the features in the database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1487 Returns : an array of features, or an iterator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1488 Args : See below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1489 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1490
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1491 This is equivalent to calling $db-E<gt>features() without any types, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1492 will return all the features in the database. The -merge and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1493 -iterator arguments are recognized, and behave the same as described
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1494 for features().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1496 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1498 sub all_seqfeatures {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1499 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1500 my ($automerge,$iterator)= rearrange([
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1501 [qw(MERGE AUTOMERGE)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1502 'ITERATOR'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1503 ],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1504 my @args;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1505 push @args,(-merge=>$automerge) if defined $automerge;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1506 push @args,(-iterator=>$iterator) if defined $iterator;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1507 $self->features(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1508 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1509
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1510 =head1 Creating and Loading GFF Databases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1512 =head2 initialize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1513
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1514 Title : initialize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1515 Usage : $db->initialize(-erase=>$erase,-option1=>value1,-option2=>value2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1516 Function: initialize a GFF database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1517 Returns : true if initialization successful
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1518 Args : a set of named parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1519 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1521 This method can be used to initialize an empty database. It takes the following
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1522 named arguments:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1523
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1524 -erase A boolean value. If true the database will be wiped clean if it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1525 already contains data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1526
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1527 Other named arguments may be recognized by subclasses. They become database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1528 meta values that control various settable options.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1529
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1530 As a shortcut (and for backward compatibility) a single true argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1531 is the same as initialize(-erase=E<gt>1).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1533 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1535 sub initialize {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1536 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1537 #$self->do_initialize(1) if @_ == 1 && $_[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1538 #why was this line (^) here? I can't see that it actually does anything
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1539 #one option would be to execute the line and return, but I don't know
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1540 #why you would want to do that either.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1541
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1542 my ($erase,$meta) = rearrange(['ERASE'],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1543 $meta ||= {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1545 # initialize (possibly erasing)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1546 return unless $self->do_initialize($erase);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1547 my @default = $self->default_meta_values;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1548
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1549 # this is an awkward way of uppercasing the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1550 # even-numbered values (necessary for case-insensitive SQL databases)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1551 for (my $i=0; $i<@default; $i++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1552 $default[$i] = uc $default[$i] if !($i % 2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1553 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1555 my %values = (@default,%$meta);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1556 foreach (keys %values) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1557 $self->meta($_ => $values{$_});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1558 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1559 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1560 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1563 =head2 load_gff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1565 Title : load_gff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1566 Usage : $db->load_gff($file|$directory|$filehandle);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1567 Function: load GFF data into database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1568 Returns : count of records loaded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1569 Args : a directory, a file, a list of files,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1570 or a filehandle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1571 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1572
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1573 This method takes a single overloaded argument, which can be any of:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1574
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1575 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1576
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1577 =item 1. a scalar corresponding to a GFF file on the system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1579 A pathname to a local GFF file. Any files ending with the .gz, .Z, or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1580 .bz2 suffixes will be transparently decompressed with the appropriate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1581 command-line utility.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1582
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1583 =item 2. an array reference containing a list of GFF files on the system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1584
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1585 For example ['/home/gff/gff1.gz','/home/gff/gff2.gz']
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1586
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1587 =item 3. directory path
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1589 The indicated directory will be searched for all files ending in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1590 suffixes .gff, .gff.gz, .gff.Z or .gff.bz2.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1591
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1592 =item 4. filehandle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1593
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1594 An open filehandle from which to read the GFF data. Tied filehandles
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1595 now work as well.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1596
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1597 =item 5. a pipe expression
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1598
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1599 A pipe expression will also work. For example, a GFF file on a remote
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1600 web server can be loaded with an expression like this:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1601
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1602 $db->load_gff("lynx -dump -source http://stein.cshl.org/gff_test |");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1603
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1604 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1605
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1606 If successful, the method will return the number of GFF lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1607 successfully loaded.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1608
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1609 NOTE:this method used to be called load(), but has been changed. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1610 old method name is also recognized.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1612 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1613
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1614 sub load_gff {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1615 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1616 my $file_or_directory = shift || '.';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1617 return $self->do_load_gff($file_or_directory) if ref($file_or_directory) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1618 tied *$file_or_directory;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1620 my $tied_stdin = tied(*STDIN);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1621 open SAVEIN,"<&STDIN" unless $tied_stdin;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1622 local @ARGV = $self->setup_argv($file_or_directory,'gff') or return; # to play tricks with reader
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1623 my $result = $self->do_load_gff('ARGV');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1624 open STDIN,"<&SAVEIN" unless $tied_stdin; # restore STDIN
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1625 return $result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1626 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1628 *load = \&load_gff;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1630 =head2 load_fasta
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1632 Title : load_fasta
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1633 Usage : $db->load_fasta($file|$directory|$filehandle);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1634 Function: load FASTA data into database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1635 Returns : count of records loaded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1636 Args : a directory, a file, a list of files,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1637 or a filehandle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1638 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1639
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1640 This method takes a single overloaded argument, which can be any of:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1641
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1642 =over 4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1643
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1644 =item 1. scalar corresponding to a FASTA file on the system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1645
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1646 A pathname to a local FASTA file. Any files ending with the .gz, .Z, or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1647 .bz2 suffixes will be transparently decompressed with the appropriate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1648 command-line utility.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1649
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1650 =item 2. array reference containing a list of FASTA files on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1651 system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1652
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1653 For example ['/home/fasta/genomic.fa.gz','/home/fasta/genomic.fa.gz']
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1654
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1655 =item 3. path to a directory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1656
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1657 The indicated directory will be searched for all files ending in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1658 suffixes .fa, .fa.gz, .fa.Z or .fa.bz2.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1659
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1660 a=item 4. filehandle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1661
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1662 An open filehandle from which to read the FASTA data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1663
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1664 =item 5. pipe expression
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1665
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1666 A pipe expression will also work. For example, a FASTA file on a remote
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1667 web server can be loaded with an expression like this:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1669 $db->load_gff("lynx -dump -source http://stein.cshl.org/fasta_test.fa |");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1670
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1671 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1673 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1674
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1675 sub load_fasta {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1676 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1677 my $file_or_directory = shift || '.';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1678 return $self->load_sequence($file_or_directory) if ref($file_or_directory) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1679 tied *$file_or_directory;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1681 my $tied = tied(*STDIN);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1682 open SAVEIN,"<&STDIN" unless $tied;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1683 local @ARGV = $self->setup_argv($file_or_directory,'fa','dna','fasta') or return; # to play tricks with reader
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1684 my $result = $self->load_sequence('ARGV');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1685 open STDIN,"<&SAVEIN" unless $tied; # restore STDIN
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1686 return $result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1687 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1689 =head2 load_sequence_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1690
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1691 Title : load_sequence_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1692 Usage : $db->load_sequence_string($id,$dna)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1693 Function: load a single DNA entry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1694 Returns : true if successfully loaded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1695 Args : a raw sequence string (DNA, RNA, protein)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1696 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1697
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1698 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1699
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1700 sub load_sequence_string {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1701 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1702 my ($acc,$seq) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1703 my $offset = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1704 $self->insert_sequence_chunk($acc,\$offset,\$seq) or return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1705 $self->insert_sequence($acc,$offset,$seq) or return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1706 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1707 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1708
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1709 sub setup_argv {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1710 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1711 my $file_or_directory = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1712 my @suffixes = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1713 no strict 'refs'; # so that we can call fileno() on the argument
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1715 my @argv;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1716
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1717 if (-d $file_or_directory) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1718 @argv = map { glob("$file_or_directory/*.{$_,$_.gz,$_.Z,$_.bz2}")} @suffixes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1719 }elsif (my $fd = fileno($file_or_directory)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1720 open STDIN,"<&=$fd" or $self->throw("Can't dup STDIN");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1721 @argv = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1722 } elsif (ref $file_or_directory) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1723 @argv = @$file_or_directory;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1724 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1725 @argv = $file_or_directory;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1726 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1727
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1728 foreach (@argv) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1729 if (/\.gz$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1730 $_ = "gunzip -c $_ |";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1731 } elsif (/\.Z$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1732 $_ = "uncompress -c $_ |";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1733 } elsif (/\.bz2$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1734 $_ = "bunzip2 -c $_ |";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1735 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1736 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1737 @argv;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1738 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1740 =head2 lock_on_load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1741
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1742 Title : lock_on_load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1743 Usage : $lock = $db->lock_on_load([$lock])
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1744 Function: set write locking during load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1745 Returns : current value of lock-on-load flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1746 Args : new value of lock-on-load-flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1747 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1748
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1749 This method is honored by some of the adaptors. If the value is true,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1750 the tables used by the GFF modules will be locked for writing during
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1751 loads and inaccessible to other processes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1752
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1753 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1754
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1755 sub lock_on_load {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1756 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1757 my $d = $self->{lock};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1758 $self->{lock} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1759 $d;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1760 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1761
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1762 =head2 meta
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1764 Title : meta
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1765 Usage : $value = $db->meta($name [,$newval])
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1766 Function: get or set a meta variable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1767 Returns : a string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1768 Args : meta variable name and optionally value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1769 Status : abstract
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1770
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1771 Get or set a named metavalues for the database. Metavalues can be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1772 used for database-specific settings.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1773
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1774 By default, this method does nothing!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1775
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1776 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1777
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1778 sub meta {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1779 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1780 my ($name,$value) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1781 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1782 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1783
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1784 =head2 default_meta_values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1786 Title : default_meta_values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1787 Usage : %values = $db->default_meta_values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1788 Function: empty the database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1789 Returns : a list of tag=>value pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1790 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1791 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1792
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1793 This method returns a list of tag=E<gt>value pairs that contain default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1794 meta information about the database. It is invoked by initialize() to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1795 write out the default meta values. The base class version returns an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1796 empty list.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1797
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1798 For things to work properly, meta value names must be UPPERCASE.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1799
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1800 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1801
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1802 sub default_meta_values {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1803 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1804 return ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1805 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1806
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1807
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1808 =head2 error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1809
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1810 Title : error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1811 Usage : $db->error( [$new error] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1812 Function: read or set error message
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1813 Returns : error message
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1814 Args : an optional argument to set the error message
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1815 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1816
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1817 This method can be used to retrieve the last error message. Errors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1818 are not reset to empty by successful calls, so contents are only valid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1819 immediately after an error condition has been detected.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1820
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1821 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1822
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1823 sub error {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1824 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1825 my $g = $self->{error};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1826 $self->{error} = join '',@_ if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1827 $g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1828 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1829
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1830 =head2 debug
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1831
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1832 Title : debug
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1833 Usage : $db->debug( [$flag] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1834 Function: read or set debug flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1835 Returns : current value of debug flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1836 Args : new debug flag (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1837 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1838
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1839 This method can be used to turn on debug messages. The exact nature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1840 of those messages depends on the adaptor in use.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1841
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1842 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1843
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1844 sub debug {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1845 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1846 my $g = $self->{debug};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1847 $self->{debug} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1848 $g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1849 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1850
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1851
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1852 =head2 automerge
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1853
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1854 Title : automerge
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1855 Usage : $db->automerge( [$new automerge] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1856 Function: get or set automerge value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1857 Returns : current value (boolean)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1858 Args : an optional argument to set the automerge value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1859 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1860
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1861 By default, this module will use the aggregators to merge groups into
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1862 single composite objects. This default can be changed to false by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1863 calling automerge(0).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1864
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1865 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1866
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1867 sub automerge {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1868 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1869 my $g = $self->{automerge};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1870 $self->{automerge} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1871 $g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1872 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1873
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1874 =head2 attributes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1875
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1876 Title : attributes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1877 Usage : @attributes = $db->attributes($id,$name)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1878 Function: get the "attributres" on a particular feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1879 Returns : an array of string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1880 Args : feature ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1881 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1882
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1883 Some GFF version 2 files use the groups column to store a series of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1884 attribute/value pairs. In this interpretation of GFF, the first such
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1885 pair is treated as the primary group for the feature; subsequent pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1886 are treated as attributes. Two attributes have special meaning:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1887 "Note" is for backward compatibility and is used for unstructured text
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1888 remarks. "Alias" is considered as a synonym for the feature name.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1889
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1890 If no name is provided, then attributes() returns a flattened hash, of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1891 attribute=E<gt>value pairs. This lets you do:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1892
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1893 %attributes = $db->attributes($id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1894
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1895 Normally, attributes() will be called by the feature:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1896
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1897 @notes = $feature->attributes('Note');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1898
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1899 In a scalar context, attributes() returns the first value of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1900 attribute if a tag is present, otherwise a hash reference in which the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1901 keys are attribute names and the values are anonymous arrays
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1902 containing the values.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1903
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1904 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1905
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1906 sub attributes {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1907 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1908 my ($id,$tag) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1909 my @result = $self->do_attributes($id,$tag) or return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1910 return @result if wantarray;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1911
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1912 # what to do in an array context
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1913 return $result[0] if $tag;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1914 my %result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1915 while (my($key,$value) = splice(@result,0,2)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1916 push @{$result{$key}},$value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1917 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1918 return \%result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1919 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1920
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1921 =head2 fast_queries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1922
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1923 Title : fast_queries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1924 Usage : $flag = $db->fast_queries([$flag])
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1925 Function: turn on and off the "fast queries" option
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1926 Returns : a boolean
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1927 Args : a boolean flag (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1928 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1929
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1930 The mysql database driver (and possibly others) support a "fast" query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1931 mode that caches results on the server side. This makes queries come
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1932 back faster, particularly when creating iterators. The downside is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1933 that while iterating, new queries will die with a "command synch"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1934 error. This method turns the feature on and off.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1935
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1936 For databases that do not support a fast query, this method has no
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1937 effect.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1938
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1939 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1940
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1941 # override this method in order to set the mysql_use_result attribute, which is an obscure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1942 # but extremely powerful optimization for both performance and memory.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1943 sub fast_queries {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1944 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1945 my $d = $self->{fast_queries};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1946 $self->{fast_queries} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1947 $d;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1948 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1949
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1950 =head2 add_aggregator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1951
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1952 Title : add_aggregator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1953 Usage : $db->add_aggregator($aggregator)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1954 Function: add an aggregator to the list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1955 Returns : nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1956 Args : an aggregator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1957 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1958
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1959 This method will append an aggregator to the end of the list of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1960 registered aggregators. Three different argument types are accepted:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1962 1) a Bio::DB::GFF::Aggregator object -- will be added
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1963 2) a string in the form "aggregator_name{subpart1,subpart2,subpart3/main_method}"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1964 -- will be turned into a Bio::DB::GFF::Aggregator object (the /main_method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1965 part is optional).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1966 3) a valid Perl token -- will be turned into a Bio::DB::GFF::Aggregator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1967 subclass, where the token corresponds to the subclass name.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1968
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1969 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1970
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1971 sub add_aggregator {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1972 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1973 my $aggregator = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1974 my $list = $self->{aggregators} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1975 if (ref $aggregator) { # an object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1976 @$list = grep {$_->get_method ne $aggregator->get_method} @$list;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1977 push @$list,$aggregator;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1978 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1979
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1980 elsif ($aggregator =~ /^(\w+)\{([^\/\}]+)\/?(.*)\}$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1981 my($agg_name,$subparts,$mainpart) = ($1,$2,$3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1982 my @subparts = split /,\s*/,$subparts;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1983 my @args = (-method => $agg_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1984 -sub_parts => \@subparts);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1985 push @args,(-main_method => $mainpart) if $mainpart;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1986 warn "making an aggregator with (@args), subparts = @subparts" if $self->debug;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1987 push @$list,Bio::DB::GFF::Aggregator->new(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1988 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1989
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1990 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1991 my $class = "Bio::DB::GFF::Aggregator::\L${aggregator}\E";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1992 eval "require $class";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1993 $self->throw("Unable to load $aggregator aggregator: $@") if $@;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1994 push @$list,$class->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1995 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1996 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1997
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1998 =head2 aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2000 Title : aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2001 Usage : $db->aggregators([@new_aggregators]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2002 Function: retrieve list of aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2003 Returns : list of aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2004 Args : a list of aggregators to set (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2005 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2006
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2007 This method will get or set the list of aggregators assigned to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2008 the database. If 1 or more arguments are passed, the existing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2009 set will be cleared.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2010
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2011 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2012
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2013 sub aggregators {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2014 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2015 my $d = $self->{aggregators};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2016 if (@_) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2017 $self->clear_aggregators;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2018 $self->add_aggregator($_) foreach @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2019 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2020 return unless $d;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2021 return @$d;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2022 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2023
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2024 =head2 clear_aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2025
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2026 Title : clear_aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2027 Usage : $db->clear_aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2028 Function: clears list of aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2029 Returns : nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2030 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2031 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2032
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2033 This method will clear the aggregators stored in the database object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2034 Use aggregators() or add_aggregator() to add some back.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2035
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2036 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2037
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2038 sub clear_aggregators { shift->{aggregators} = [] }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2039
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2040 =head1 Methods for use by Subclasses
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2041
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2042 The following methods are chiefly of interest to subclasses and are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2043 not intended for use by end programmers.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2044
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2045 =head2 abscoords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2046
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2047 Title : abscoords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2048 Usage : $db->abscoords($name,$class,$refseq)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2049 Function: finds position of a landmark in reference coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2050 Returns : ($ref,$class,$start,$stop,$strand)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2051 Args : name and class of landmark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2052 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2053
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2054 This method is called by Bio::DB::GFF::RelSegment to obtain the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2055 absolute coordinates of a sequence landmark. The arguments are the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2056 name and class of the landmark. If successful, abscoords() returns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2057 the ID of the reference sequence, its class, its start and stop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2058 positions, and the orientation of the reference sequence's coordinate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2059 system ("+" for forward strand, "-" for reverse strand).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2060
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2061 If $refseq is present in the argument list, it forces the query to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2062 search for the landmark in a particular reference sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2063
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2064 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2065
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2066 sub abscoords {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2067 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2068 my ($name,$class,$refseq) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2069 $class ||= $self->{default_class};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2070 $self->get_abscoords($name,$class,$refseq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2071 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2072
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2073 =head1 Protected API
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2074
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2075 The following methods are not intended for public consumption, but are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2076 intended to be overridden/implemented by adaptors.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2077
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2078 =head2 default_aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2079
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2080 Title : default_aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2081 Usage : $db->default_aggregators;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2082 Function: retrieve list of aggregators
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2083 Returns : array reference containing list of aggregator names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2084 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2085 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2086
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2087 This method (which is intended to be overridden by adaptors) returns a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2088 list of standard aggregators to be applied when no aggregators are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2089 specified in the constructor.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2090
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2091 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2092
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2093 sub default_aggregators {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2094 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2095 return ['processed_transcript','alignment'];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2096 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2097
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2098 =head2 do_load_gff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2099
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2100 Title : do_load_gff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2101 Usage : $db->do_load_gff($handle)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2102 Function: load a GFF input stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2103 Returns : number of features loaded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2104 Args : A filehandle.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2105 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2107 This method is called to load a GFF data stream. The method will read
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2108 GFF features from E<lt>E<gt> and load them into the database. On exit the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2109 method must return the number of features loaded.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2111 Note that the method is responsible for parsing the GFF lines. This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2112 is to allow for differences in the interpretation of the "group"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2113 field, which are legion.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2114
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2115 You probably want to use load_gff() instead. It is more flexible
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2116 about the arguments it accepts.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2118 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2120 # load from <>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2121 sub do_load_gff {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2122 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2123 my $io_handle = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2125 local $self->{gff3_flag} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2126 $self->setup_load();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2128 my $fasta_sequence_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2130 while (<$io_handle>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2131 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2132 $self->{gff3_flag}++ if /^\#\#gff-version\s+3/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2133 if (/^>(\S+)/) { # uh oh, sequence coming
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2134 $fasta_sequence_id = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2135 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2136 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2137 if (/^\#\#\s*sequence-region\s+(\S+)\s+(\d+)\s+(\d+)/i) { # header line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2138 $self->load_gff_line(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2139 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2140 ref => $1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2141 class => 'Sequence',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2142 source => 'reference',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2143 method => 'Component',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2144 start => $2,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2145 stop => $3,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2146 score => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2147 strand => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2148 phase => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2149 gclass => 'Sequence',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2150 gname => $1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2151 tstart => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2152 tstop => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2153 attributes => [],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2154 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2155 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2156 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2157 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2159 next if /^\#/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2160 my ($ref,$source,$method,$start,$stop,$score,$strand,$phase,$group) = split "\t";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2161 next unless defined($ref) && defined($method) && defined($start) && defined($stop);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2162 foreach (\$score,\$strand,\$phase) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2163 undef $$_ if $$_ eq '.';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2164 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2166 my ($gclass,$gname,$tstart,$tstop,$attributes) = $self->split_group($group,$self->{gff3_flag});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2168 # no standard way in the GFF file to denote the class of the reference sequence -- drat!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2169 # so we invoke the factory to do it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2170 my $class = $self->refclass($ref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2172 # call subclass to do the dirty work
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2173 if ($start > $stop) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2174 ($start,$stop) = ($stop,$start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2175 if ($strand eq '+') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2176 $strand = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2177 } elsif ($strand eq '-') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2178 $strand = '+';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2179 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2180 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2181 $self->load_gff_line({ref => $ref,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2182 class => $class,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2183 source => $source,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2184 method => $method,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2185 start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2186 stop => $stop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2187 score => $score,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2188 strand => $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2189 phase => $phase,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2190 gclass => $gclass,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2191 gname => $gname,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2192 tstart => $tstart,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2193 tstop => $tstop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2194 attributes => $attributes}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2195 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2196 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2198 my $result = $self->finish_load();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2199 $result += $self->load_sequence($io_handle,$fasta_sequence_id)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2200 if defined $fasta_sequence_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2201 $result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2203 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2204
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2205 =head2 load_sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2207 Title : load_sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2208 Usage : $db->load_sequence($handle [,$id])
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2209 Function: load a FASTA data stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2210 Returns : number of sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2211 Args : a filehandle and optionally the ID of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2212 the first sequence in the stream.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2213 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2215 You probably want to use load_fasta() instead. The $id argument is a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2216 hack used to switch from GFF loading to FASTA loading when load_gff()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2217 discovers FASTA data hiding at the bottom of the GFF file (as Artemis
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2218 does).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2220 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2222 sub load_sequence {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2223 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2224 my $io_handle = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2225 my $id = shift; # hack for GFF files that contain fasta data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2227 # read fasta file(s) from ARGV
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2228 my ($seq,$offset,$loaded) = (undef,0,0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2229 while (<$io_handle>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2230 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2231 if (/^>(\S+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2232 $self->insert_sequence($id,$offset,$seq) if $id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2233 $id = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2234 $offset = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2235 $seq = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2236 $loaded++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2237 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2238 $seq .= $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2239 $self->insert_sequence_chunk($id,\$offset,\$seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2240 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2241 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2242 $self->insert_sequence($id,$offset,$seq) if $id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2243 $loaded+0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2244 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2246 sub insert_sequence_chunk {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2247 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2248 my ($id,$offsetp,$seqp) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2249 if (my $cs = $self->dna_chunk_size) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2250 while (length($$seqp) >= $cs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2251 my $chunk = substr($$seqp,0,$cs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2252 $self->insert_sequence($id,$$offsetp,$chunk);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2253 $$offsetp += length($chunk);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2254 substr($$seqp,0,$cs) = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2255 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2256 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2257 return 1; # the calling routine may expect success or failure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2258 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2259
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2260 # used to store big pieces of DNA in itty bitty pieces
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2261 sub dna_chunk_size {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2262 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2263 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2265 sub insert_sequence {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2266 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2267 my($id,$offset,$seq) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2268 $self->throw('insert_sequence(): must be defined in subclass');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2271 # This is the default class for reference points. Defaults to Sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2272 sub default_class {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2273 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2274 my $d = exists($self->{default_class}) ? $self->{default_class} : 'Sequence';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2275 $self->{default_class} = shift if @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2276 $d;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2277 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2279 # gets name of the reference sequence, and returns its class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2280 # currently just calls default_class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2281 sub refclass {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2282 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2283 my $name = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2284 return $self->default_class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2285 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2286
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2287 =head2 setup_load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2289 Title : setup_load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2290 Usage : $db->setup_load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2291 Function: called before load_gff_line()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2292 Returns : void
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2293 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2294 Status : abstract
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2296 This abstract method gives subclasses a chance to do any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2297 schema-specific initialization prior to loading a set of GFF records.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2298 It must be implemented by a subclass.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2300 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2302 sub setup_load {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2303 # default, do nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2304 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2306 =head2 finish_load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2308 Title : finish_load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2309 Usage : $db->finish_load
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2310 Function: called after load_gff_line()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2311 Returns : number of records loaded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2312 Args : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2313 Status :abstract
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2315 This method gives subclasses a chance to do any schema-specific
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2316 cleanup after loading a set of GFF records.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2318 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2320 sub finish_load {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2321 # default, do nothing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2322 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2324 =head2 load_gff_line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2325
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2326 Title : load_gff_line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2327 Usage : $db->load_gff_line(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2328 Function: called to load one parsed line of GFF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2329 Returns : true if successfully inserted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2330 Args : see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2331 Status : abstract
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2333 This abstract method is called once per line of the GFF and passed a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2334 hashref containing parsed GFF fields. The fields are:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2335
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2336 {ref => $ref,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2337 class => $class,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2338 source => $source,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2339 method => $method,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2340 start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2341 stop => $stop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2342 score => $score,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2343 strand => $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2344 phase => $phase,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2345 gclass => $gclass,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2346 gname => $gname,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2347 tstart => $tstart,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2348 tstop => $tstop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2349 attributes => $attributes}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2350
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2351 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2352
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2353 sub load_gff_line {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2354 shift->throw("load_gff_line(): must be implemented by an adaptor");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2355 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2358 =head2 do_initialize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2359
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2360 Title : do_initialize
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2361 Usage : $db->do_initialize([$erase])
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2362 Function: initialize and possibly erase database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2363 Returns : true if successful
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2364 Args : optional erase flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2365 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2367 This method implements the initialize() method described above, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2368 takes the same arguments.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2370 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2371
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2372 sub do_initialize {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2373 shift->throw('do_initialize(): must be implemented by an adaptor');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2374 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2375
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2376 =head2 dna
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2378 Title : dna
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2379 Usage : $db->dna($id,$start,$stop,$class)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2380 Function: return the raw DNA string for a segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2381 Returns : a raw DNA string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2382 Args : id of the sequence, its class, start and stop positions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2383 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2384
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2385 This method is invoked by Bio::DB::GFF::Segment to fetch the raw DNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2386 sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2388 Arguments: -name sequence name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2389 -start start position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2390 -stop stop position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2391 -class sequence class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2392
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2393 If start and stop are both undef, then the entire DNA is retrieved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2394 So to fetch the whole dna, call like this:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2395
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2396 $db->dna($name_of_sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2397
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2398 or like this:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2399
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2400 $db->dna(-name=>$name_of_sequence,-class=>$class_of_sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2402 NOTE: you will probably prefer to create a Segment and then invoke its
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2403 dna() method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2404
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2405 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2406
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2407 # call to return the DNA string for the indicated region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2408 # real work is done by get_dna()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2409 sub dna {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2410 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2411 my ($id,$start,$stop,$class) = rearrange([
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2412 [qw(NAME ID REF REFSEQ)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2413 qw(START),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2414 [qw(STOP END)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2415 'CLASS',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2416 ],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2417 # return unless defined $start && defined $stop;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2418 $self->get_dna($id,$start,$stop,$class);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2419 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2421 sub features_in_range {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2422 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2423 my ($range_type,$refseq,$class,$start,$stop,$types,$parent,$sparse,$automerge,$iterator,$other) =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2424 rearrange([
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2425 [qw(RANGE_TYPE)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2426 [qw(REF REFSEQ)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2427 qw(CLASS),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2428 qw(START),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2429 [qw(STOP END)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2430 [qw(TYPE TYPES)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2431 qw(PARENT),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2432 [qw(RARE SPARSE)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2433 [qw(MERGE AUTOMERGE)],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2434 'ITERATOR'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2435 ],@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2436 $other ||= {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2437 $automerge = $types && $self->automerge unless defined $automerge;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2438 $self->throw("range type must be one of {".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2439 join(',',keys %valid_range_types).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2440 "}\n")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2441 unless $valid_range_types{lc $range_type};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2442 $self->_features({
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2443 rangetype => lc $range_type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2444 refseq => $refseq,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2445 refclass => $class,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2446 start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2447 stop => $stop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2448 types => $types },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2449 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2450 sparse => $sparse,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2451 automerge => $automerge,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2452 iterator => $iterator,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2453 %$other,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2454 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2455 $parent);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2456 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2458 =head2 get_dna
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2459
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2460 Title : get_dna
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2461 Usage : $db->get_dna($id,$start,$stop,$class)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2462 Function: get DNA for indicated segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2463 Returns : the dna string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2464 Args : sequence ID, start, stop and class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2465 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2467 If start E<gt> stop and the sequence is nucleotide, then this method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2468 should return the reverse complement. The sequence class may be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2469 ignored by those databases that do not recognize different object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2470 types.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2471
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2472 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2473
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2474 sub get_dna {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2475 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2476 my ($id,$start,$stop,$class,) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2477 $self->throw("get_dna() must be implemented by an adaptor");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2478 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2480 =head2 get_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2481
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2482 Title : get_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2483 Usage : $db->get_features($search,$options,$callback)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2484 Function: get list of features for a region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2485 Returns : count of number of features retrieved
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2486 Args : see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2487 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2489 The first argument is a hash reference containing search criteria for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2490 retrieving features. It contains the following keys:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2492 rangetype One of "overlaps", "contains" or "contained_in". Indicates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2493 the type of range query requested.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2495 refseq ID of the landmark that establishes the absolute
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2496 coordinate system.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2498 refclass Class of this landmark. Can be ignored by implementations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2499 that don't recognize such distinctions.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2501 start Start of the range, inclusive.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2503 stop Stop of the range, inclusive.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2505 types Array reference containing the list of annotation types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2506 to fetch from the database. Each annotation type is an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2507 array reference consisting of [source,method].
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2509 The second argument is a hash reference containing certain options
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2510 that affect the way information is retrieved:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2512 sort_by_group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2513 A flag. If true, means that the returned features should be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2514 sorted by the group that they're in.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2515
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2516 sparse A flag. If true, means that the expected density of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2517 features is such that it will be more efficient to search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2518 by type rather than by range. If it is taking a long
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2519 time to fetch features, give this a try.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2521 binsize A true value will create a set of artificial features whose
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2522 start and stop positions indicate bins of the given size, and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2523 whose scores are the number of features in the bin. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2524 class of the feature will be set to "bin", and its name to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2525 "method:source". This is a handy way of generating histograms
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2526 of feature density.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2527
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2528 The third argument, the $callback, is a code reference to which
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2529 retrieved features are passed. It is described in more detail below.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2531 This routine is responsible for getting arrays of GFF data out of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2532 database and passing them to the callback subroutine. The callback
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2533 does the work of constructing a Bio::DB::GFF::Feature object out of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2534 that data. The callback expects a list of 13 fields:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2535
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2536 $refseq The reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2537 $start feature start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2538 $stop feature stop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2539 $source feature source
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2540 $method feature method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2541 $score feature score
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2542 $strand feature strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2543 $phase feature phase
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2544 $groupclass group class (may be undef)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2545 $groupname group ID (may be undef)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2546 $tstart target start for similarity hits (may be undef)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2547 $tstop target stop for similarity hits (may be undef)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2548 $feature_id A unique feature ID (may be undef)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2550 These fields are in the same order as the raw GFF file, with the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2551 exception that the group column has been parsed into group class and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2552 group name fields.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2553
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2554 The feature ID, if provided, is a unique identifier of the feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2555 line. The module does not depend on this ID in any way, but it is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2556 available via Bio::DB::GFF-E<gt>id() if wanted. In the dbi::mysql and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2557 dbi::mysqlopt adaptor, the ID is a unique row ID. In the acedb
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2558 adaptor it is not used.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2559
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2560 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2562 sub get_features{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2563 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2564 my ($search,$options,$callback) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2565 $self->throw("get_features() must be implemented by an adaptor");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2566 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2567
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2569 =head2 _feature_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2570
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2571 Title : _feature_by_name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2572 Usage : $db->_feature_by_name($class,$name,$location,$callback)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2573 Function: get a list of features by name and class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2574 Returns : count of number of features retrieved
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2575 Args : name of feature, class of feature, and a callback
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2576 Status : abstract
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2578 This method is used internally. The callback arguments are the same
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2579 as those used by make_feature(). This method must be overidden by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2580 subclasses.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2581
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2582 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2583
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2584 sub _feature_by_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2585 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2586 my ($class,$name,$location,$callback) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2587 $self->throw("_feature_by_name() must be implemented by an adaptor");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2588 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2589
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2590 sub _feature_by_attribute {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2591 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2592 my ($attributes,$callback) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2593 $self->throw("_feature_by_name() must be implemented by an adaptor");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2594 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2595
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2596 =head2 _feature_by_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2598 Title : _feature_by_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2599 Usage : $db->_feature_by_id($ids,$type,$callback)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2600 Function: get a feature based
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2601 Returns : count of number of features retrieved
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2602 Args : arrayref to feature IDs to fetch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2603 Status : abstract
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2604
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2605 This method is used internally to fetch features either by their ID or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2606 their group ID. $ids is a arrayref containing a list of IDs, $type is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2607 one of "feature" or "group", and $callback is a callback. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2608 callback arguments are the same as those used by make_feature(). This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2609 method must be overidden by subclasses.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2610
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2611 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2613 sub _feature_by_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2614 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2615 my ($ids,$type,$callback) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2616 $self->throw("_feature_by_id() must be implemented by an adaptor");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2617 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2618
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2619 =head2 overlapping_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2621 Title : overlapping_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2622 Usage : $db->overlapping_features(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2623 Function: get features that overlap the indicated range
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2624 Returns : a list of Bio::DB::GFF::Feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2625 Args : see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2626 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2628 This method is invoked by Bio::DB::GFF::Segment-E<gt>features() to find
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2629 the list of features that overlap a given range. It is generally
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2630 preferable to create the Segment first, and then fetch the features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2632 This method takes set of named arguments:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2633
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2634 -refseq ID of the reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2635 -class Class of the reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2636 -start Start of the desired range in refseq coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2637 -stop Stop of the desired range in refseq coordinates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2638 -types List of feature types to return. Argument is an array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2639 reference containing strings of the format "method:source"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2640 -parent A parent Bio::DB::GFF::Segment object, used to create
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2641 relative coordinates in the generated features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2642 -rare Turn on an optimization suitable for a relatively rare feature type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2643 where it will be faster to filter by feature type first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2644 and then by position, rather than vice versa.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2645 -merge Whether to apply aggregators to the generated features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2646 -iterator Whether to return an iterator across the features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2647
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2648 If -iterator is true, then the method returns a single scalar value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2649 consisting of a Bio::SeqIO object. You can call next_seq() repeatedly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2650 on this object to fetch each of the features in turn. If iterator is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2651 false or absent, then all the features are returned as a list.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2652
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2653 Currently aggregation is disabled when iterating over a series of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2654 features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2656 Types are indicated using the nomenclature "method:source". Either of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2657 these fields can be omitted, in which case a wildcard is used for the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2658 missing field. Type names without the colon (e.g. "exon") are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2659 interpreted as the method name and a source wild card. Regular
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2660 expressions are allowed in either field, as in: "similarity:BLAST.*".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2661
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2662 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2663
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2664 # call to return the features that overlap the named region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2665 # real work is done by get_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2666 sub overlapping_features {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2667 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2668 $self->features_in_range(-range_type=>'overlaps',@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2669 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2670
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2671 =head2 contained_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2673 Title : contained_features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2674 Usage : $db->contained_features(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2675 Function: get features that are contained within the indicated range
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2676 Returns : a list of Bio::DB::GFF::Feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2677 Args : see overlapping_features()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2678 Status : public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2679
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2680 This call is similar to overlapping_features(), except that it only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2681 retrieves features whose end points are completely contained within
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2682 the specified range.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2684 Generally you will want to fetch a Bio::DB::GFF::Segment object and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2685 call its contained_features() method rather than call this directly.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2687 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2689 # The same, except that it only returns features that are completely contained within the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2690 # range (much faster usually)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2691 sub contained_features {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2692 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2693 $self->features_in_range(-range_type=>'contains',@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2694 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2696 =head2 contained_in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2697
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2698 Title : contained_in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2699 Usage : @features = $s->contained_in(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2700 Function: get features that contain this segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2701 Returns : a list of Bio::DB::GFF::Feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2702 Args : see features()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2703 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2704
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2705 This is identical in behavior to features() except that it returns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2706 only those features that completely contain the segment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2707
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2708 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2710 sub contained_in {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2711 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2712 $self->features_in_range(-range_type=>'contained_in',@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2713 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2715 =head2 get_abscoords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2716
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2717 Title : get_abscoords
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2718 Usage : $db->get_abscoords($name,$class,$refseq)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2719 Function: get the absolute coordinates of sequence with name & class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2720 Returns : ($absref,$absstart,$absstop,$absstrand)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2721 Args : name and class of the landmark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2722 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2723
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2724 Given the name and class of a genomic landmark, this function returns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2725 a four-element array consisting of:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2726
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2727 $absref the ID of the reference sequence that contains this landmark
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2728 $absstart the position at which the landmark starts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2729 $absstop the position at which the landmark stops
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2730 $absstrand the strand of the landmark, relative to the reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2732 If $refseq is provided, the function searches only within the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2733 specified reference sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2734
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2735 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2736
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2737 sub get_abscoords {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2738 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2739 my ($name,$class,$refseq) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2740 $self->throw("get_abscoords() must be implemented by an adaptor");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2741 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2742
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2743 =head2 get_types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2744
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2745 Title : get_types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2746 Usage : $db->get_types($absref,$class,$start,$stop,$count)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2747 Function: get list of all feature types on the indicated segment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2748 Returns : list or hash of Bio::DB::GFF::Typename objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2749 Args : see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2750 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2751
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2752 Arguments are:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2753
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2754 $absref the ID of the reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2755 $class the class of the reference sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2756 $start the position to start counting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2757 $stop the position to end counting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2758 $count a boolean indicating whether to count the number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2759 of occurrences of each feature type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2760
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2761 If $count is true, then a hash is returned. The keys of the hash are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2762 feature type names in the format "method:source" and the values are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2763 the number of times a feature of this type overlaps the indicated
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2764 segment. Otherwise, the call returns a set of Bio::DB::GFF::Typename
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2765 objects. If $start or $stop are undef, then all features on the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2766 indicated segment are enumerated. If $absref is undef, then the call
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2767 returns all feature types in the database.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2769 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2770
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2771 sub get_types {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2772 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2773 my ($refseq,$class,$start,$stop,$count,$types) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2774 $self->throw("get_types() must be implemented by an adaptor");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2775 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2776
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2777 =head2 make_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2778
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2779 Title : make_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2780 Usage : $db->make_feature(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2781 Function: Create a Bio::DB::GFF::Feature object from string data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2782 Returns : a Bio::DB::GFF::Feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2783 Args : see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2784 Status : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2786 This takes 14 arguments (really!):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2788 $parent A Bio::DB::GFF::RelSegment object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2789 $group_hash A hashref containing unique list of GFF groups
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2790 $refname The name of the reference sequence for this feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2791 $refclass The class of the reference sequence for this feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2792 $start Start of feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2793 $stop Stop of feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2794 $source Feature source field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2795 $method Feature method field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2796 $score Feature score field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2797 $strand Feature strand
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2798 $phase Feature phase
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2799 $group_class Class of feature group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2800 $group_name Name of feature group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2801 $tstart For homologies, start of hit on target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2802 $tstop Stop of hit on target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2804 The $parent argument, if present, is used to establish relative
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2805 coordinates in the resulting Bio::DB::Feature object. This allows one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2806 feature to generate a list of other features that are relative to its
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2807 coordinate system (for example, finding the coordinates of the second
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2808 exon relative to the coordinates of the first).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2809
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2810 The $group_hash allows the group_class/group_name strings to be turned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2811 into rich database objects via the make_obect() method (see above).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2812 Because these objects may be expensive to create, $group_hash is used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2813 to uniquefy them. The index of this hash is the composite key
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2814 {$group_class,$group_name,$tstart,$tstop}. Values are whatever object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2815 is returned by the make_object() method.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2816
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2817 The remainder of the fields are taken from the GFF line, with the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2818 exception that "Target" features, which contain information about the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2819 target of a homology search, are parsed into their components.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2820
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2821 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2822
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2823 # This call is responsible for turning a line of GFF into a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2824 # feature object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2825 # The $parent argument is a Bio::DB::GFF::Segment object and is used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2826 # to establish the coordinate system for the new feature.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2827 # The $group_hash argument is an hash ref that holds previously-
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2828 # generated group objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2829 # Other arguments are taken right out of the GFF table.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2830 sub make_feature {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2831 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2832 my ($parent,$group_hash, # these arguments provided by generic mechanisms
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2833 $srcseq, # the rest is provided by adaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2834 $start,$stop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2835 $source,$method,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2836 $score,$strand,$phase,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2837 $group_class,$group_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2838 $tstart,$tstop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2839 $db_id,$group_id) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2840
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2841 return unless $srcseq; # return undef if called with no arguments. This behavior is used for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2842 # on-the-fly aggregation.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2843
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2844 my $group; # undefined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2845 if (defined $group_class && defined $group_name) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2846 $tstart ||= '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2847 $tstop ||= '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2848 if ($group_hash) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2849 $group = $group_hash->{$group_class,$group_name,$tstart,$tstop}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2850 ||= $self->make_object($group_class,$group_name,$tstart,$tstop);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2851 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2852 $group = $self->make_object($group_class,$group_name,$tstart,$tstop);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2853 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2854 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2855
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2856 # fix for some broken GFF files
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2857 # unfortunately - has undesired side effects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2858 # if (defined $tstart && defined $tstop && !defined $strand) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2859 # $strand = $tstart <= $tstop ? '+' : '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2860 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2861
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2862 if (ref $parent) { # note that the src sequence is ignored
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2863 return Bio::DB::GFF::Feature->new_from_parent($parent,$start,$stop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2864 $method,$source,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2865 $score,$strand,$phase,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2866 $group,$db_id,$group_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2867 $tstart,$tstop);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2868 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2869 return Bio::DB::GFF::Feature->new($self,$srcseq,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2870 $start,$stop,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2871 $method,$source,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2872 $score,$strand,$phase,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2873 $group,$db_id,$group_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2874 $tstart,$tstop);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2875 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2876 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2877
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2878 sub make_aggregated_feature {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2879 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2880 my ($accumulated_features,$parent,$aggregators) = splice(@_,0,3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2881 my $feature = $self->make_feature($parent,undef,@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2882 return [$feature] if $feature && !$feature->group;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2883
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2884 # if we have accumulated features and either:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2885 # (1) make_feature() returned undef, indicated very end or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2886 # (2) the current group is different from the previous one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2887
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2888 local $^W = 0; # irritating uninitialized value warning in next statement
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2889 if (@$accumulated_features &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2890 (!defined($feature) || ($accumulated_features->[-1]->group ne $feature->group))) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2891 foreach my $a (@$aggregators) { # last aggregator gets first shot
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2892 $a->aggregate($accumulated_features,$self) or next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2893 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2894 my @result = @$accumulated_features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2895 @$accumulated_features = $feature ? ($feature) : ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2896 return unless @result;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2897 return \@result ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2898 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2899 push @$accumulated_features,$feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2900 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2901 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2902
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2903 =head2 parse_types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2904
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2905 Title : parse_types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2906 Usage : $db->parse_types(@args)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2907 Function: parses list of types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2908 Returns : an array ref containing ['method','source'] pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2909 Args : a list of types in 'method:source' form
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2910 Status : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2911
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2912 This method takes an array of type names in the format "method:source"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2913 and returns an array reference of ['method','source'] pairs. It will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2914 also accept a single argument consisting of an array reference with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2915 the list of type names.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2916
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2917 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2918
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2919 # turn feature types in the format "method:source" into a list of [method,source] refs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2920 sub parse_types {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2921 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2922 return [] if !@_ or !defined($_[0]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2923 return $_[0] if ref $_[0] eq 'ARRAY' && ref $_[0][0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2924 my @types = ref($_[0]) ? @{$_[0]} : @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2925 my @type_list = map { [split(':',$_,2)] } @types;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2926 return \@type_list;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2927 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2928
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2929 =head2 make_match_sub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2930
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2931 Title : make_match_sub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2932 Usage : $db->make_match_sub($types)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2933 Function: creates a subroutine used for filtering features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2934 Returns : a code reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2935 Args : a list of parsed type names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2936 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2937
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2938 This method is used internally to generate a code subroutine that will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2939 accept or reject a feature based on its method and source. It takes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2940 an array of parsed type names in the format returned by parse_types(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2941 and generates an anonymous subroutine. The subroutine takes a single
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2942 Bio::DB::GFF::Feature object and returns true if the feature matches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2943 one of the desired feature types, and false otherwise.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2944
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2945 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2946
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2947 # a subroutine that matches features indicated by list of types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2948 sub make_match_sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2949 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2950 my $types = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2951
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2952 return sub { 1 } unless ref $types && @$types;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2953
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2954 my @expr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2955 for my $type (@$types) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2956 my ($method,$source) = @$type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2957 $method ||= '.*';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2958 $source = $source ? ":$source" : "(?::.+)?";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2959 push @expr,"${method}${source}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2960 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2961 my $expr = join '|',@expr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2962 return $self->{match_subs}{$expr} if $self->{match_subs}{$expr};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2963
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2964 my $sub =<<END;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2965 sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2966 my \$feature = shift or return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2967 return \$feature->type =~ /^($expr)\$/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2968 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2969 END
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2970 warn "match sub: $sub\n" if $self->debug;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2971 my $compiled_sub = eval $sub;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2972 $self->throw($@) if $@;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2973 return $self->{match_subs}{$expr} = $compiled_sub;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2974 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2975
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2976 =head2 make_object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2977
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2978 Title : make_object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2979 Usage : $db->make_object($class,$name,$start,$stop)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2980 Function: creates a feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2981 Returns : a feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2982 Args : see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2983 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2984
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2985 This method is called to make an object from the GFF "group" field.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2986 By default, all Target groups are turned into Bio::DB::GFF::Homol
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2987 objects, and everything else becomes a Bio::DB::GFF::Featname.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2988 However, adaptors are free to override this method to generate more
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2989 interesting objects, such as true BioPerl objects, or Acedb objects.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2990
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2991 Arguments are:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2992
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2993 $name database ID for object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2994 $class class of object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2995 $start for similarities, start of match inside object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2996 $stop for similarities, stop of match inside object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2997
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2998 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3000 # abstract call to turn a feature into an object, given its class and name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3001 sub make_object {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3002 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3003 my ($class,$name,$start,$stop) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3004 return Bio::DB::GFF::Homol->new($self,$class,$name,$start,$stop)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3005 if defined $start and length $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3006 return Bio::DB::GFF::Featname->new($class,$name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3007 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3008
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3009
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3010 =head2 do_attributes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3011
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3012 Title : do_attributes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3013 Usage : $db->do_attributes($id [,$tag]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3014 Function: internal method to retrieve attributes given an id and tag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3015 Returns : a list of Bio::DB::GFF::Feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3016 Args : a feature id and a attribute tag (optional)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3017 Status : protected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3018
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3019 This method is overridden by subclasses in order to return a list of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3020 attributes. If called with a tag, returns the value of attributes of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3021 that tag type. If called without a tag, returns a flattened array of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3022 (tag=E<gt>value) pairs. A particular tag can be present multiple times.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3023
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3024 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3025
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3026 sub do_attributes {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3027 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3028 my ($id,$tag) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3029 return ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3030 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3031
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3032
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3033
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3034 =head1 Internal Methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3035
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3036 The following methods are internal to Bio::DB::GFF and are not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3037 guaranteed to remain the same.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3038
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3039 =head2 _features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3040
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3041 Title : _features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3042 Usage : $db->_features($search,$options,$parent)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3043 Function: internal method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3044 Returns : a list of Bio::DB::GFF::Feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3045 Args : see below
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3046 Status : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3047
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3048 This is an internal method that is called by overlapping_features(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3049 contained_features() and features() to create features based on a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3050 parent segment's coordinate system. It takes three arguments, a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3051 search options hashref, an options hashref, and a parent segment.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3052
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3053 The search hashref contains the following keys:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3054
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3055 rangetype One of "overlaps", "contains" or "contained_in". Indicates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3056 the type of range query requested.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3057 refseq reference sequence ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3058 refclass reference sequence class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3059 start start of range
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3060 stop stop of range
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3061 types arrayref containing list of types in "method:source" form
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3062
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3063 The options hashref contains zero or more of the following keys:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3064
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3065 sparse turn on optimizations for a rare feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3066 automerge if true, invoke aggregators to merge features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3067 iterator if true, return an iterator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3068
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3069 The $parent argument is a scalar object containing a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3070 Bio::DB::GFF::RelSegment object or descendent.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3071
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3072 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3073
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3074 #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3076 sub _features {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3077 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3078 my ($search,$options,$parent) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3079 (@{$search}{qw(start stop)}) = (@{$search}{qw(stop start)})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3080 if defined($search->{start}) && $search->{start} > $search->{stop};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3081
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3082 my $types = $self->parse_types($search->{types}); # parse out list of types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3083 my @aggregated_types = @$types; # keep a copy
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3084
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3085 # allow the aggregators to operate on the original
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3086 my @aggregators;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3087 if ($options->{automerge}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3088 for my $a ($self->aggregators) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3089 $a = $a->clone if $options->{iterator};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3090 unshift @aggregators,$a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3091 if $a->disaggregate(\@aggregated_types,$self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3092 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3093 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3094
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3095 if ($options->{iterator}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3096 my @accumulated_features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3097 my $callback = $options->{automerge} ? sub { $self->make_aggregated_feature(\@accumulated_features,$parent,\@aggregators,@_) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3098 : sub { [$self->make_feature($parent,undef,@_)] };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3099 return $self->get_features_iterator({ %$search,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3100 types => \@aggregated_types },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3101 { %$options,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3102 sort_by_group => $options->{automerge} },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3103 $callback
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3104 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3105 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3107 my %groups; # cache the groups we create to avoid consuming too much unecessary memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3108 my $features = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3110 my $callback = sub { push @$features,$self->make_feature($parent,\%groups,@_) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3111 $self->get_features({ %$search,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3112 types => \@aggregated_types },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3113 $options,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3114 $callback);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3116 if ($options->{automerge}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3117 warn "aggregating...\n" if $self->debug;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3118 foreach my $a (@aggregators) { # last aggregator gets first shot
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3119 warn "Aggregator $a:\n" if $self->debug;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3120 $a->aggregate($features,$self);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3121 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3122 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3124 @$features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3125 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3127 =head2 get_features_iterator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3129 Title : get_features_iterator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3130 Usage : $db->get_features_iterator($search,$options,$callback)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3131 Function: get an iterator on a features query
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3132 Returns : a Bio::SeqIO object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3133 Args : as per get_features()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3134 Status : Public
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3136 This method takes the same arguments as get_features(), but returns an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3137 iterator that can be used to fetch features sequentially, as per
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3138 Bio::SeqIO.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3140 Internally, this method is simply a front end to range_query().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3141 The latter method constructs and executes the query, returning a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3142 statement handle. This routine passes the statement handle to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3143 constructor for the iterator, along with the callback.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3145 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3147 sub get_features_iterator {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3148 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3149 my ($search,$options,$callback) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3150 $self->throw('feature iteration is not implemented in this adaptor');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3151 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3153 =head2 split_group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3155 Title : split_group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3156 Usage : $db->split_group($group_field,$gff3_flag)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3157 Function: parse GFF group field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3158 Returns : ($gclass,$gname,$tstart,$tstop,$attributes)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3159 Args : the gff group column and a flag indicating gff3 compatibility
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3160 Status : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3162 This is a method that is called by load_gff_line to parse out the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3163 contents of one or more group fields. It returns the class of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3164 group, its name, the start and stop of the target, if any, and an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3165 array reference containing any attributes that were stuck into the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3166 group field, in [attribute_name,attribute_value] format.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3168 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3170 sub split_group {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3171 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3172 my ($group,$gff3) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3173 if ($gff3) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3174 my @groups = split /[;&]/,$group; # so easy!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3175 return $self->_split_gff3_group(@groups);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3176 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3177 # handle group parsing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3178 # protect embedded semicolons in the group; there must be faster/more elegant way
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3179 # to do this.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3180 $group =~ s/\\;/$;/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3181 while ($group =~ s/( \"[^\"]*);([^\"]*\")/$1$;$2/) { 1 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3182 my @groups = split(/\s*;\s*/,$group);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3183 foreach (@groups) { s/$;/;/g }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3184 return $self->_split_gff2_group(@groups);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3185 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3186 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3188 =head2 _split_gff2_group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3190 This is an internal method called by split_group().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3192 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3194 sub _split_gff2_group {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3195 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3196 my @groups = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3198 my ($gclass,$gname,$tstart,$tstop,@attributes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3200 for (@groups) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3202 my ($tag,$value) = /^(\S+)(?:\s+(.+))?/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3203 $value ||= '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3204 if ($value =~ /^\"(.+)\"$/) { #remove quotes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3205 $value = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3206 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3207 $value =~ s/\\t/\t/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3208 $value =~ s/\\r/\r/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3210 # Any additional groups become part of the attributes hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3211 # For historical reasons, the tag "Note" is treated as an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3212 # attribute, even if it is the only group.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3213 $tag ||= '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3214 if ($tag eq 'Note' or ($gclass && $gname)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3215 push @attributes,[$tag => $value];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3216 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3218 # if the tag eq 'Target' then the class name is embedded in the ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3219 # (the GFF format is obviously screwed up here)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3220 elsif ($tag eq 'Target' && /([^:\"\s]+):([^\"\s]+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3221 ($gclass,$gname) = ($1,$2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3222 ($tstart,$tstop) = / (\d+) (\d+)/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3223 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3225 elsif (!$value) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3226 push @attributes,[Note => $tag]; # e.g. "Confirmed_by_EST"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3227 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3229 # otherwise, the tag and value correspond to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3230 # group class and name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3231 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3232 ($gclass,$gname) = ($tag,$value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3233 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3234 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3236 return ($gclass,$gname,$tstart,$tstop,\@attributes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3237 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3239 =head2 _split_gff3_group
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3241 This is called internally from split_group().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3243 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3245 sub _split_gff3_group {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3246 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3247 my @groups = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3248 my ($gclass,$gname,$tstart,$tstop,@attributes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3250 for my $group (@groups) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3251 my ($tag,$value) = split /=/,$group;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3252 $tag = unescape($tag);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3253 my @values = map {unescape($_)} split /,/,$value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3254 if ($tag eq 'Parent') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3255 $gclass = 'Sequence';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3256 $gname = shift @values;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3257 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3258 elsif ($tag eq 'ID') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3259 $gclass = 'Sequence';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3260 $gname = shift @values;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3261 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3262 elsif ($tag eq 'Target') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3263 $gclass = 'Sequence';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3264 ($gname,$tstart,$tstop) = split /\s+/,shift @values;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3265 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3266 push @attributes,[$tag=>$_] foreach @values;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3267 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3268 return ($gclass,$gname,$tstart,$tstop,\@attributes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3271 =head2 _delete_features(), _delete_groups(),_delete()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3273 Title : _delete_features(), _delete_groups(),_delete()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3274 Usage : $count = $db->_delete_features(@feature_ids)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3275 $count = $db->_delete_groups(@group_ids)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3276 $count = $db->_delete(\%delete_spec)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3277 Function: low-level feature/group deleter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3278 Returns : count of groups removed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3279 Args : list of feature or group ids removed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3280 Status : for implementation by subclasses
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3282 These methods need to be implemented in adaptors. For
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3283 _delete_features and _delete_groups, the arguments are a list of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3284 feature or group IDs to remove. For _delete(), the argument is a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3285 hashref with the three keys 'segments', 'types' and 'force'. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3286 first contains an arrayref of Bio::DB::GFF::RelSegment objects to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3287 delete (all FEATURES within the segment are deleted). The second
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3288 contains an arrayref of [method,source] feature types to delete. The
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3289 two are ANDed together. If 'force' has a true value, this forces the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3290 operation to continue even if it would delete all features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3292 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3294 sub _delete_features {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3295 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3296 my @feature_ids = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3297 $self->throw('_delete_features is not implemented in this adaptor');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3298 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3300 sub _delete_groups {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3301 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3302 my @group_ids = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3303 $self->throw('_delete_groups is not implemented in this adaptor');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3304 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3306 sub _delete {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3307 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3308 my $delete_options = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3309 $self->throw('_delete is not implemented in this adaptor');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3310 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3312 sub unescape {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3313 my $v = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3314 $v =~ tr/+/ /;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3315 $v =~ s/%([0-9a-fA-F]{2})/chr hex($1)/ge;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3316 return $v;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3317 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3318
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3320 package Bio::DB::GFF::ID_Iterator;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3321 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3323 use Bio::Root::Root;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3324 use vars '@ISA';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3325 @ISA = 'Bio::Root::Root';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3327 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3328 my $class = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3329 my ($db,$ids,$type) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3330 return bless {ids=>$ids,db=>$db,type=>$type},$class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3331 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3333 sub next_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3334 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3335 my $next = shift @{$self->{ids}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3336 return unless $next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3337 my $name = ref($next) eq 'ARRAY' ? Bio::DB::GFF::Featname->new(@$next) : $next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3338 my $segment = $self->{type} eq 'name' ? $self->{db}->segment($name)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3339 : $self->{type} eq 'feature' ? $self->{db}->fetch_feature_by_id($name)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3340 : $self->{type} eq 'group' ? $self->{db}->fetch_feature_by_gid($name)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3341 : $self->throw("Bio::DB::GFF::ID_Iterator called to fetch an unknown type of identifier");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3342 $self->throw("id does not exist") unless $segment;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3343 return $segment;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3344 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3346 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3348 __END__
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3349
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3350 =head1 BUGS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3352 Features can only belong to a single group at a time. This must be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3353 addressed soon.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3355 Start coordinate can be greater than stop coordinate for relative
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3356 addressing. This breaks strict BioPerl compatibility and must be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3357 fixed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3359 =head1 SEE ALSO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3360
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3361 L<bioperl>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3362 L<Bio::DB::GFF::RelSegment>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3363 L<Bio::DB::GFF::Aggregator>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3364 L<Bio::DB::GFF::Feature>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3365 L<Bio::DB::GFF::Adaptor::dbi::mysqlopt>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3366 L<Bio::DB::GFF::Adaptor::dbi::oracle>,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3367 L<Bio::DB::GFF::Adaptor::memory>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3369 =head1 AUTHOR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3371 Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3373 Copyright (c) 2001 Cold Spring Harbor Laboratory.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3375 This library is free software; you can redistribute it and/or modify
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3376 it under the same terms as Perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3378 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3379