Mercurial > repos > mahtabm > ensembl
diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/GFF.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/GFF.pm Thu Apr 11 02:01:53 2013 -0400 @@ -0,0 +1,195 @@ +# +# EnsEMBL module for Bio::EnsEMBL::Funcgen::Parsers::GFF +# + +=head1 LICENSE + + Copyright (c) 1999-2011 The European Bioinformatics Institute and + Genome Research Limited. All rights reserved. + + This software is distributed under a modified Apache license. + For license details, please see + + http://www.ensembl.org/info/about/code_licence.html + +=head1 CONTACT + + Please email comments or questions to the public Ensembl + developers list at <ensembl-dev@ebi.ac.uk>. + + Questions may also be sent to the Ensembl help desk at + <helpdesk@ensembl.org>. + +#Could this be based on a Generic Flat file parser? + +=head1 NAME + +Bio::EnsEMBL::Funcgen::Parsers::GFF + +=head1 SYNOPSIS + + my $parser_type = "Bio::EnsEMBL::Funcgen::Parsers::GFF"; + push @INC, $parser_type; + my $imp = $class->SUPER::new(@_); + + +=head1 DESCRIPTION + +This is a definitions class which should not be instatiated directly, it +normally set by the Importer as the parent class. GFF contains meta +data and methods specific to data in bed format, to aid +parsing and importing of experimental data. + +=cut + +package Bio::EnsEMBL::Funcgen::Parsers::GFF; + +use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate ); +use Bio::EnsEMBL::Utils::Argument qw( rearrange ); +use strict; + + +use vars qw(@ISA); +@ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::ExperimentalSet); + +=head2 new + + Example : my $self = $class->SUPER::new(@_); + Description: Constructor method for GFF class + Returntype : Bio::EnsEMBL::Funcgen::Parsers::GFF + Exceptions : None + Caller : Bio::EnsEMBL::Funcgen::Importer + Status : at risk + +=cut + + +sub new{ + my $caller = shift; + + my $class = ref($caller) || $caller; + + #define default fields here and pass + #We also need to be able to take custom attrs mappings + + #keys are array index of field, key are Feature paramter names + #reverse this? + #Unless we have a compound field which we name accordingly + #And then call e.g. parse_attrs + #Which will return a hash with the relevant Feature attributes + + #Is splitting this up simply going to make the parse slower due to acessor methods? + + #Pass or just set directly here? + #<seqname> <source> <feature> <start> <end> <score> <strand> <frame> [attributes] [comments] + + + #Some of these may be highly redundant due to the nature of the data. + #We can hash things to lessen the impact but we're still going to be checking if exists for each one + #No way around this? Unless it is marginally faster to set a permanent type and then only check a boolean. + #Yes there is, this is the exhaustive GFF definition, we can just redefine or delete some entries dynamically to + #avoid ever considering a particular field index. + + + #Don't need any of this? Can we simply define process fields? + #This will remove the ability to define custom formats + #But then again we can only have custom format if it has ensembl compliant data + #i.e. no preprocessing has to be done before populating the feature_params hash + + #my %fields = ( +# 0 => 'fetch_slice', +# 1 => 'get_source', +# 2 => 'get_feature_type', +# 3 => '-start', + # 4 => '-end', +# 5 => '-strand',#Will most likely be , need to convert to -.+ > -1 0 1 + #6 => 'frame',#will most likely be . +# 7 => 'get_attributes', +# ); + + #We want to be able to define mappings between attributes and fields + #we're basically just dealing with display_label for annotated_feature + #e.g -display_label_format => ID+ACC + #Or maybe format of several fields and attrs + text? + #We need a separator which will not be used in the GFF attr names + #we also need to be able to differentiate + #First check standard GFF field, then check attrs + ##No no no, just have method, generate display label + #forget this for now and just use one field + + my $display_label_field = 'ID';#default + + #We still need to define the field name here as a global hash to allow this display_label_field look up. + + + my $self = $class->SUPER::new(@_);#, -fields => \%fields); + + ($display_label_field) = rearrange(['DISPLAY_LABEL_FIELD'], @_); + + #We need to define meta header method, starting with '##' + #Also need to skip comments '#' at begining or end of line + #Do we also need to skip field header? No methinks not. + + #Define result method + # $self->{'file_ext'} => 'gff';#Could use vendor here? + + #define this if we want to override the generic method in Simple + #$self->{'config'}{'results_data'} => ["and_import_gff"]; + + $self->display_label_field($display_label_field); + + + return $self; +} + + +=head2 set_config + + Example : my $self->set_config; + Description: Sets attribute dependent config + Returntype : None + Exceptions : None + Caller : Bio::EnsEMBL::Funcgen::Importer + Status : at risk + +=cut + + +sub set_config{ + my $self = shift; + + $self->SUPER::set_config; + + #GFF specific stuff here. + + return; +} + +#Need to implement this! +sub parse_line{ + my ($self, $line) = @_; + + #return if $line ~= + + #my %fields = ( +# 0 => 'fetch_slice', +# 1 => 'get_source', +# 2 => 'get_feature_type', +# 3 => '-start', + # 4 => '-end', +# 5 => '-strand',#Will most likely be , need to convert to -.+ > -1 0 1 + #6 => 'frame',#will most likely be . +# 7 => 'get_attributes', +# ); + + + + my ($chr, $start, $end, $pid, $score) = split/\t/o, $line; + + #we need to return feature_params and seq if defined? + +} + + + +1;
