Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/GFF.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 # | |
2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::Parsers::GFF | |
3 # | |
4 | |
5 =head1 LICENSE | |
6 | |
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and | |
8 Genome Research Limited. All rights reserved. | |
9 | |
10 This software is distributed under a modified Apache license. | |
11 For license details, please see | |
12 | |
13 http://www.ensembl.org/info/about/code_licence.html | |
14 | |
15 =head1 CONTACT | |
16 | |
17 Please email comments or questions to the public Ensembl | |
18 developers list at <ensembl-dev@ebi.ac.uk>. | |
19 | |
20 Questions may also be sent to the Ensembl help desk at | |
21 <helpdesk@ensembl.org>. | |
22 | |
23 #Could this be based on a Generic Flat file parser? | |
24 | |
25 =head1 NAME | |
26 | |
27 Bio::EnsEMBL::Funcgen::Parsers::GFF | |
28 | |
29 =head1 SYNOPSIS | |
30 | |
31 my $parser_type = "Bio::EnsEMBL::Funcgen::Parsers::GFF"; | |
32 push @INC, $parser_type; | |
33 my $imp = $class->SUPER::new(@_); | |
34 | |
35 | |
36 =head1 DESCRIPTION | |
37 | |
38 This is a definitions class which should not be instatiated directly, it | |
39 normally set by the Importer as the parent class. GFF contains meta | |
40 data and methods specific to data in bed format, to aid | |
41 parsing and importing of experimental data. | |
42 | |
43 =cut | |
44 | |
45 package Bio::EnsEMBL::Funcgen::Parsers::GFF; | |
46 | |
47 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate ); | |
48 use Bio::EnsEMBL::Utils::Argument qw( rearrange ); | |
49 use strict; | |
50 | |
51 | |
52 use vars qw(@ISA); | |
53 @ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::ExperimentalSet); | |
54 | |
55 =head2 new | |
56 | |
57 Example : my $self = $class->SUPER::new(@_); | |
58 Description: Constructor method for GFF class | |
59 Returntype : Bio::EnsEMBL::Funcgen::Parsers::GFF | |
60 Exceptions : None | |
61 Caller : Bio::EnsEMBL::Funcgen::Importer | |
62 Status : at risk | |
63 | |
64 =cut | |
65 | |
66 | |
67 sub new{ | |
68 my $caller = shift; | |
69 | |
70 my $class = ref($caller) || $caller; | |
71 | |
72 #define default fields here and pass | |
73 #We also need to be able to take custom attrs mappings | |
74 | |
75 #keys are array index of field, key are Feature paramter names | |
76 #reverse this? | |
77 #Unless we have a compound field which we name accordingly | |
78 #And then call e.g. parse_attrs | |
79 #Which will return a hash with the relevant Feature attributes | |
80 | |
81 #Is splitting this up simply going to make the parse slower due to acessor methods? | |
82 | |
83 #Pass or just set directly here? | |
84 #<seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments] | |
85 | |
86 | |
87 #Some of these may be highly redundant due to the nature of the data. | |
88 #We can hash things to lessen the impact but we're still going to be checking if exists for each one | |
89 #No way around this? Unless it is marginally faster to set a permanent type and then only check a boolean. | |
90 #Yes there is, this is the exhaustive GFF definition, we can just redefine or delete some entries dynamically to | |
91 #avoid ever considering a particular field index. | |
92 | |
93 | |
94 #Don't need any of this? Can we simply define process fields? | |
95 #This will remove the ability to define custom formats | |
96 #But then again we can only have custom format if it has ensembl compliant data | |
97 #i.e. no preprocessing has to be done before populating the feature_params hash | |
98 | |
99 #my %fields = ( | |
100 # 0 => 'fetch_slice', | |
101 # 1 => 'get_source', | |
102 # 2 => 'get_feature_type', | |
103 # 3 => '-start', | |
104 # 4 => '-end', | |
105 # 5 => '-strand',#Will most likely be , need to convert to -.+ > -1 0 1 | |
106 #6 => 'frame',#will most likely be . | |
107 # 7 => 'get_attributes', | |
108 # ); | |
109 | |
110 #We want to be able to define mappings between attributes and fields | |
111 #we're basically just dealing with display_label for annotated_feature | |
112 #e.g -display_label_format => ID+ACC | |
113 #Or maybe format of several fields and attrs + text? | |
114 #We need a separator which will not be used in the GFF attr names | |
115 #we also need to be able to differentiate | |
116 #First check standard GFF field, then check attrs | |
117 ##No no no, just have method, generate display label | |
118 #forget this for now and just use one field | |
119 | |
120 my $display_label_field = 'ID';#default | |
121 | |
122 #We still need to define the field name here as a global hash to allow this display_label_field look up. | |
123 | |
124 | |
125 my $self = $class->SUPER::new(@_);#, -fields => \%fields); | |
126 | |
127 ($display_label_field) = rearrange(['DISPLAY_LABEL_FIELD'], @_); | |
128 | |
129 #We need to define meta header method, starting with '##' | |
130 #Also need to skip comments '#' at begining or end of line | |
131 #Do we also need to skip field header? No methinks not. | |
132 | |
133 #Define result method | |
134 # $self->{'file_ext'} => 'gff';#Could use vendor here? | |
135 | |
136 #define this if we want to override the generic method in Simple | |
137 #$self->{'config'}{'results_data'} => ["and_import_gff"]; | |
138 | |
139 $self->display_label_field($display_label_field); | |
140 | |
141 | |
142 return $self; | |
143 } | |
144 | |
145 | |
146 =head2 set_config | |
147 | |
148 Example : my $self->set_config; | |
149 Description: Sets attribute dependent config | |
150 Returntype : None | |
151 Exceptions : None | |
152 Caller : Bio::EnsEMBL::Funcgen::Importer | |
153 Status : at risk | |
154 | |
155 =cut | |
156 | |
157 | |
158 sub set_config{ | |
159 my $self = shift; | |
160 | |
161 $self->SUPER::set_config; | |
162 | |
163 #GFF specific stuff here. | |
164 | |
165 return; | |
166 } | |
167 | |
168 #Need to implement this! | |
169 sub parse_line{ | |
170 my ($self, $line) = @_; | |
171 | |
172 #return if $line ~= | |
173 | |
174 #my %fields = ( | |
175 # 0 => 'fetch_slice', | |
176 # 1 => 'get_source', | |
177 # 2 => 'get_feature_type', | |
178 # 3 => '-start', | |
179 # 4 => '-end', | |
180 # 5 => '-strand',#Will most likely be , need to convert to -.+ > -1 0 1 | |
181 #6 => 'frame',#will most likely be . | |
182 # 7 => 'get_attributes', | |
183 # ); | |
184 | |
185 | |
186 | |
187 my ($chr, $start, $end, $pid, $score) = split/\t/o, $line; | |
188 | |
189 #we need to return feature_params and seq if defined? | |
190 | |
191 } | |
192 | |
193 | |
194 | |
195 1; |