Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Utils/IO/GFFSerializer.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 =pod | |
2 | |
3 =head1 LICENSE | |
4 | |
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and | |
6 Genome Research Limited. All rights reserved. | |
7 | |
8 This software is distributed under a modified Apache license. | |
9 For license details, please see | |
10 | |
11 http://www.ensembl.org/info/about/code_licence.html | |
12 | |
13 =head1 NAME | |
14 | |
15 GFFSerializer - Feature to GFF converter | |
16 | |
17 =head1 AUTHOR | |
18 | |
19 Kieron Taylor, 2011 - ktaylor@ebi.ac.uk | |
20 | |
21 =head1 SYNOPSIS | |
22 | |
23 use Bio::EnsEMBL::Utils::IO::GFFSerializer; | |
24 use Bio::EnsEMBL::Utils::BiotypeMapper; | |
25 | |
26 my $ontology_adaptor = $registry->get_adaptor( 'Multi', 'Ontology', 'OntologyTerm' ); | |
27 my $biotype_mapper = new BiotypeMapper($ontology_adaptor); | |
28 my $serializer = new GFFSerializer($biotype_mapper,$output_fh); | |
29 | |
30 my $variation_feature_adaptor = $registry->get_adaptor( $config{'species'}, 'variation', 'variationfeature' ); | |
31 $serializer->print_metadata("Variation Features:"); | |
32 my $iterator = $variation_feature_adaptor->fetch_Iterator_by_Slice($slice,undef,60000); | |
33 $serializer->print_feature_Iterator($iterator); | |
34 | |
35 =head1 DESCRIPTION | |
36 | |
37 Subclass of Serializer that can turn a feature into a line for the GFF3 format. Requires | |
38 a BiotypeMapper in order to translate biotypes to SO terms. | |
39 | |
40 =cut | |
41 | |
42 package Bio::EnsEMBL::Utils::IO::GFFSerializer; | |
43 use strict; | |
44 use warnings; | |
45 use Bio::EnsEMBL::Utils::Exception; | |
46 use Bio::EnsEMBL::Utils::BiotypeMapper; | |
47 use URI::Escape; | |
48 use Bio::EnsEMBL::Utils::IO::FeatureSerializer; | |
49 | |
50 use base qw(Bio::EnsEMBL::Utils::IO::FeatureSerializer); | |
51 | |
52 my %strand_conversion = ( '1' => '+', '0' => '?', '-1' => '-'); | |
53 | |
54 =head2 new | |
55 | |
56 Constructor | |
57 Arg [1] : Ontology Adaptor | |
58 Arg [2] : Optional File handle | |
59 | |
60 Returntype : Bio::EnsEMBL::Utils::IO::GFFSerializer | |
61 | |
62 =cut | |
63 | |
64 sub new { | |
65 my $class = shift; | |
66 my $self = { | |
67 ontology_adaptor => shift, | |
68 filehandle => shift, | |
69 }; | |
70 bless $self, $class; | |
71 if ( ref($self->{'ontology_adaptor'}) ne "Bio::EnsEMBL::DBSQL::OntologyTermAdaptor" ) { | |
72 throw("GFF format requires an instance of Bio::EnsEMBL::DBSQL::OntologyTermAdaptor to function. See also Bio::EnsEMBL::Utils::BiotypeMapper"); | |
73 } | |
74 $self->{'mapper'} = new Bio::EnsEMBL::Utils::BiotypeMapper($self->{'ontology_adaptor'}); | |
75 | |
76 if (!defined ($self->{'filehandle'})) { | |
77 # no file handle, let the handle point to a copy of STDOUT instead | |
78 open $self->{'filehandle'}, ">&STDOUT"; | |
79 $self->{'stdout'} = 1; | |
80 } | |
81 return $self; | |
82 } | |
83 | |
84 =head2 print_feature | |
85 | |
86 Arg [1] : Bio::EnsEMBL::Feature, subclass or related pseudo-feature | |
87 Example : $reporter->print_feature($feature,$slice_start_coordinate,"X") | |
88 Description: Asks a feature for its summary, and generates a GFF3 | |
89 compliant entry to hand back again | |
90 Additional attributes are handed through to column 9 of the | |
91 output using exact spelling and capitalisation of the | |
92 feature-supplied hash. | |
93 Returntype : none | |
94 =cut | |
95 | |
96 sub print_feature { | |
97 my $self = shift; | |
98 my $feature = shift; | |
99 my $biotype_mapper = $self->{'mapper'}; | |
100 | |
101 my $text_buffer = ""; | |
102 if ($feature->can('summary_as_hash') ) { | |
103 my %summary = %{$feature->summary_as_hash}; | |
104 my $row = ""; | |
105 # Column 1 - seqname, the name of the sequence/chromosome the feature is on. Landmark for start below | |
106 if (!defined($summary{'seq_region_name'})) {$summary{'seq_region_name'} = "?";} | |
107 $row .= $summary{'seq_region_name'}."\t"; | |
108 | |
109 # Column 2 - source, complicated with Ensembl not being the originator of all data | |
110 $row .= "EnsEMBL\t"; | |
111 | |
112 # Column 3 - feature, the ontology term for the kind of feature this row is | |
113 my $so_term = $biotype_mapper->translate_feature_to_SO_term($feature); | |
114 $row .= $so_term."\t"; | |
115 | |
116 # Column 4 - start, the start coordinate of the feature, here shifted to chromosomal coordinates | |
117 # Start and end must be in ascending order for GFF. Circular genomes require the length of | |
118 # the circuit to be added on. | |
119 if ($summary{'start'} > $summary{'end'}) { | |
120 #assumes this is not a Compara circular sequence and can treat is as a Feature | |
121 if ($feature->slice() && $feature->slice()->is_circular() ) { | |
122 $summary{'end'} = $summary{'end'} + $feature->seq_region_length; | |
123 } | |
124 # non-circular, but end still before start | |
125 else {$summary{'end'} = $summary{'start'};} | |
126 } | |
127 $row .= $summary{'start'} . "\t"; | |
128 | |
129 # Column 5 - end, coordinates (absolute) for the end of this feature | |
130 $row .= $summary{'end'} . "\t"; | |
131 | |
132 # Column 6 - score, for variations only. | |
133 if (exists($summary{'score'})) { | |
134 $row .= $summary{'score'}."\t"; | |
135 } | |
136 else { | |
137 $row .= ".\t"; | |
138 } | |
139 | |
140 # Column 7 - strand, up or down | |
141 if (exists($summary{'strand'})) { | |
142 $row .= $strand_conversion{$summary{'strand'}}."\t"; | |
143 } | |
144 else { | |
145 $row .= ".\t"; | |
146 } | |
147 | |
148 # Column 8 - reading frame, necessary only for Exons | |
149 $row .= ".\t"; | |
150 | |
151 # Column 9 - the 'other' section for all GFF and GVF compliant attributes | |
152 # We include Stable ID and biotype where possible to supplement the information in the other columns | |
153 delete $summary{'seq_region_start'}; | |
154 delete $summary{'seq_region_name'}; | |
155 delete $summary{'start'}; | |
156 delete $summary{'end'}; | |
157 delete $summary{'strand'}; | |
158 delete $summary{'score'}; | |
159 # Slice the hash for specific keys in GFF-friendly order | |
160 my @ordered_keys = qw(ID Name Alias Parent Target Gap Derives_from Note Dbxref Ontology_term Is_circular); | |
161 my @ordered_values = @summary{@ordered_keys}; | |
162 while (my $key = shift @ordered_keys) { | |
163 my $value = shift @ordered_values; | |
164 if ($value) { | |
165 $row .= $key."=".uri_escape($value,'\t\n\r;=%&,').";"; | |
166 } | |
167 delete $summary{$key}; | |
168 } | |
169 # Catch the remaining keys, containing whatever else the Feature provided | |
170 foreach my $attribute ( keys(%summary)) { | |
171 if (ref $summary{$attribute} eq "ARRAY") { | |
172 $row .= $attribute."=".join (',',@{$summary{$attribute}}) . ";" | |
173 } | |
174 else { | |
175 if ($summary{$attribute}) { $row .= $attribute."=".uri_escape($summary{$attribute},'\t\n\r;=%&,') . ";"; } | |
176 } | |
177 } | |
178 # trim off any trailing commas left by the ordered keys stage above: | |
179 $text_buffer .= $row."\n"; | |
180 } | |
181 else { | |
182 warning("Feature failed to self-summarise"); | |
183 } | |
184 #filehandle is inherited | |
185 my $fh = $self->{'filehandle'}; | |
186 print $fh $text_buffer; | |
187 } | |
188 | |
189 =head2 print_main_header | |
190 | |
191 Arg [1] : Arrayref of slices going into the file. | |
192 Description: Printing the header text or metadata required for GFF, | |
193 using a list of slices to be written | |
194 Returntype : None | |
195 =cut | |
196 | |
197 sub print_main_header { | |
198 my $self = shift; | |
199 my $arrayref_of_slices = shift; | |
200 my $fh = $self->{'filehandle'}; | |
201 | |
202 print $fh "##gff-version 3\n"; | |
203 foreach my $slice (@{$arrayref_of_slices}) { | |
204 if (not defined($slice)) { warning("Slice not defined.\n"); return;} | |
205 print $fh "##sequence-region ",$slice->seq_region_name," ",$slice->start," ",$slice->end,"\n"; | |
206 } | |
207 } | |
208 | |
209 sub print_metadata { | |
210 my $self = shift; | |
211 my $text = shift; | |
212 my $fh = $self->{'filehandle'}; | |
213 print $fh "\n#".$text."\n"; | |
214 } | |
215 | |
216 | |
217 1; |