comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/GFF.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::Parsers::GFF
3 #
4
5 =head1 LICENSE
6
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and
8 Genome Research Limited. All rights reserved.
9
10 This software is distributed under a modified Apache license.
11 For license details, please see
12
13 http://www.ensembl.org/info/about/code_licence.html
14
15 =head1 CONTACT
16
17 Please email comments or questions to the public Ensembl
18 developers list at <ensembl-dev@ebi.ac.uk>.
19
20 Questions may also be sent to the Ensembl help desk at
21 <helpdesk@ensembl.org>.
22
23 #Could this be based on a Generic Flat file parser?
24
25 =head1 NAME
26
27 Bio::EnsEMBL::Funcgen::Parsers::GFF
28
29 =head1 SYNOPSIS
30
31 my $parser_type = "Bio::EnsEMBL::Funcgen::Parsers::GFF";
32 push @INC, $parser_type;
33 my $imp = $class->SUPER::new(@_);
34
35
36 =head1 DESCRIPTION
37
38 This is a definitions class which should not be instatiated directly, it
39 normally set by the Importer as the parent class. GFF contains meta
40 data and methods specific to data in bed format, to aid
41 parsing and importing of experimental data.
42
43 =cut
44
45 package Bio::EnsEMBL::Funcgen::Parsers::GFF;
46
47 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate );
48 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
49 use strict;
50
51
52 use vars qw(@ISA);
53 @ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::ExperimentalSet);
54
55 =head2 new
56
57 Example : my $self = $class->SUPER::new(@_);
58 Description: Constructor method for GFF class
59 Returntype : Bio::EnsEMBL::Funcgen::Parsers::GFF
60 Exceptions : None
61 Caller : Bio::EnsEMBL::Funcgen::Importer
62 Status : at risk
63
64 =cut
65
66
67 sub new{
68 my $caller = shift;
69
70 my $class = ref($caller) || $caller;
71
72 #define default fields here and pass
73 #We also need to be able to take custom attrs mappings
74
75 #keys are array index of field, key are Feature paramter names
76 #reverse this?
77 #Unless we have a compound field which we name accordingly
78 #And then call e.g. parse_attrs
79 #Which will return a hash with the relevant Feature attributes
80
81 #Is splitting this up simply going to make the parse slower due to acessor methods?
82
83 #Pass or just set directly here?
84 #<seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments]
85
86
87 #Some of these may be highly redundant due to the nature of the data.
88 #We can hash things to lessen the impact but we're still going to be checking if exists for each one
89 #No way around this? Unless it is marginally faster to set a permanent type and then only check a boolean.
90 #Yes there is, this is the exhaustive GFF definition, we can just redefine or delete some entries dynamically to
91 #avoid ever considering a particular field index.
92
93
94 #Don't need any of this? Can we simply define process fields?
95 #This will remove the ability to define custom formats
96 #But then again we can only have custom format if it has ensembl compliant data
97 #i.e. no preprocessing has to be done before populating the feature_params hash
98
99 #my %fields = (
100 # 0 => 'fetch_slice',
101 # 1 => 'get_source',
102 # 2 => 'get_feature_type',
103 # 3 => '-start',
104 # 4 => '-end',
105 # 5 => '-strand',#Will most likely be , need to convert to -.+ > -1 0 1
106 #6 => 'frame',#will most likely be .
107 # 7 => 'get_attributes',
108 # );
109
110 #We want to be able to define mappings between attributes and fields
111 #we're basically just dealing with display_label for annotated_feature
112 #e.g -display_label_format => ID+ACC
113 #Or maybe format of several fields and attrs + text?
114 #We need a separator which will not be used in the GFF attr names
115 #we also need to be able to differentiate
116 #First check standard GFF field, then check attrs
117 ##No no no, just have method, generate display label
118 #forget this for now and just use one field
119
120 my $display_label_field = 'ID';#default
121
122 #We still need to define the field name here as a global hash to allow this display_label_field look up.
123
124
125 my $self = $class->SUPER::new(@_);#, -fields => \%fields);
126
127 ($display_label_field) = rearrange(['DISPLAY_LABEL_FIELD'], @_);
128
129 #We need to define meta header method, starting with '##'
130 #Also need to skip comments '#' at begining or end of line
131 #Do we also need to skip field header? No methinks not.
132
133 #Define result method
134 # $self->{'file_ext'} => 'gff';#Could use vendor here?
135
136 #define this if we want to override the generic method in Simple
137 #$self->{'config'}{'results_data'} => ["and_import_gff"];
138
139 $self->display_label_field($display_label_field);
140
141
142 return $self;
143 }
144
145
146 =head2 set_config
147
148 Example : my $self->set_config;
149 Description: Sets attribute dependent config
150 Returntype : None
151 Exceptions : None
152 Caller : Bio::EnsEMBL::Funcgen::Importer
153 Status : at risk
154
155 =cut
156
157
158 sub set_config{
159 my $self = shift;
160
161 $self->SUPER::set_config;
162
163 #GFF specific stuff here.
164
165 return;
166 }
167
168 #Need to implement this!
169 sub parse_line{
170 my ($self, $line) = @_;
171
172 #return if $line ~=
173
174 #my %fields = (
175 # 0 => 'fetch_slice',
176 # 1 => 'get_source',
177 # 2 => 'get_feature_type',
178 # 3 => '-start',
179 # 4 => '-end',
180 # 5 => '-strand',#Will most likely be , need to convert to -.+ > -1 0 1
181 #6 => 'frame',#will most likely be .
182 # 7 => 'get_attributes',
183 # );
184
185
186
187 my ($chr, $start, $end, $pid, $score) = split/\t/o, $line;
188
189 #we need to return feature_params and seq if defined?
190
191 }
192
193
194
195 1;