annotate variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/GFF.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::Parsers::GFF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 developers list at <ensembl-dev@ebi.ac.uk>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 #Could this be based on a Generic Flat file parser?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 Bio::EnsEMBL::Funcgen::Parsers::GFF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 my $parser_type = "Bio::EnsEMBL::Funcgen::Parsers::GFF";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 push @INC, $parser_type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 my $imp = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 This is a definitions class which should not be instatiated directly, it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 normally set by the Importer as the parent class. GFF contains meta
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 data and methods specific to data in bed format, to aid
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 parsing and importing of experimental data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 package Bio::EnsEMBL::Funcgen::Parsers::GFF;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 @ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::ExperimentalSet);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 Example : my $self = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 Description: Constructor method for GFF class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 Returntype : Bio::EnsEMBL::Funcgen::Parsers::GFF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 Caller : Bio::EnsEMBL::Funcgen::Importer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 Status : at risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 sub new{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 my $caller = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 #define default fields here and pass
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 #We also need to be able to take custom attrs mappings
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 #keys are array index of field, key are Feature paramter names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 #reverse this?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 #Unless we have a compound field which we name accordingly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 #And then call e.g. parse_attrs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 #Which will return a hash with the relevant Feature attributes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 #Is splitting this up simply going to make the parse slower due to acessor methods?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 #Pass or just set directly here?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 #<seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 #Some of these may be highly redundant due to the nature of the data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 #We can hash things to lessen the impact but we're still going to be checking if exists for each one
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 #No way around this? Unless it is marginally faster to set a permanent type and then only check a boolean.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 #Yes there is, this is the exhaustive GFF definition, we can just redefine or delete some entries dynamically to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 #avoid ever considering a particular field index.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 #Don't need any of this? Can we simply define process fields?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 #This will remove the ability to define custom formats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 #But then again we can only have custom format if it has ensembl compliant data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 #i.e. no preprocessing has to be done before populating the feature_params hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 #my %fields = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 # 0 => 'fetch_slice',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 # 1 => 'get_source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 # 2 => 'get_feature_type',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 # 3 => '-start',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 # 4 => '-end',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 # 5 => '-strand',#Will most likely be , need to convert to -.+ > -1 0 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 #6 => 'frame',#will most likely be .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 # 7 => 'get_attributes',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 # );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 #We want to be able to define mappings between attributes and fields
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 #we're basically just dealing with display_label for annotated_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 #e.g -display_label_format => ID+ACC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 #Or maybe format of several fields and attrs + text?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 #We need a separator which will not be used in the GFF attr names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 #we also need to be able to differentiate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 #First check standard GFF field, then check attrs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 ##No no no, just have method, generate display label
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 #forget this for now and just use one field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $display_label_field = 'ID';#default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 #We still need to define the field name here as a global hash to allow this display_label_field look up.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 my $self = $class->SUPER::new(@_);#, -fields => \%fields);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 ($display_label_field) = rearrange(['DISPLAY_LABEL_FIELD'], @_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 #We need to define meta header method, starting with '##'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 #Also need to skip comments '#' at begining or end of line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 #Do we also need to skip field header? No methinks not.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 #Define result method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 # $self->{'file_ext'} => 'gff';#Could use vendor here?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 #define this if we want to override the generic method in Simple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 #$self->{'config'}{'results_data'} => ["and_import_gff"];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 $self->display_label_field($display_label_field);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 =head2 set_config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 Example : my $self->set_config;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 Description: Sets attribute dependent config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 Caller : Bio::EnsEMBL::Funcgen::Importer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 Status : at risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 sub set_config{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 $self->SUPER::set_config;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 #GFF specific stuff here.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 #Need to implement this!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 sub parse_line{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 my ($self, $line) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 #return if $line ~=
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 #my %fields = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 # 0 => 'fetch_slice',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 # 1 => 'get_source',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 # 2 => 'get_feature_type',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 # 3 => '-start',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 # 4 => '-end',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 # 5 => '-strand',#Will most likely be , need to convert to -.+ > -1 0 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 #6 => 'frame',#will most likely be .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 # 7 => 'get_attributes',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 # );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 my ($chr, $start, $end, $pid, $score) = split/\t/o, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 #we need to return feature_params and seq if defined?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 1;