annotate variant_effect_predictor/Bio/EnsEMBL/Utils/IO/GFFParser.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 GFFParser - simple gff3 parser.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 =head1 AUTHOR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 Monika Komorowska, 2012 - monika@ebi.ac.uk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 use Bio::EnsEMBL::Utils::IO::GFFParser;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 use IO::File;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 my $file_name = "features.gff";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 my $fh = IO::File->new($file_name, 'r');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 my $parser = Bio::EnsEMBL::Utils::IO::GFFParser->new($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 my @header_lines = @{$parser->parse_header()};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 #do something with the header lines array, e.g. print array elements
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 foreach my $header_line (@header_lines) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 print $header_line . "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 print "\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 my $feature = $parser->parse_next_feature();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 while (defined($feature) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 my %feature = %{$feature};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 #do something with the feature, e.g. print hash keys and values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 foreach my $key (keys %feature) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 if ($key ne 'attribute') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 print $key . " " . $feature{$key} ."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 print $key . "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 my %attribs = %{$feature{$key}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 foreach my $attrib_key (keys %attribs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 printf("\t%s %s\n", $attrib_key, join(q{, }, @{wrap_array($values)}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 print "\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 $feature = $parser->parse_next_feature();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 my $sequence = $parser->parse_next_sequence();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 while (defined($sequence)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 my %sequence = %{$sequence};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 foreach my $key (keys %sequence) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 print $key . " " . $sequence{$key} ."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 print "\n\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 $sequence = $parser->parse_next_sequence();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 $parser->close();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 $fh->close();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 GFF3 format as defined in http://www.sequenceontology.org/gff3.shtml
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 Use parse_header method to parse a GFF3 file header, and parse_next_feature to parse the next feature line in the file.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 This class can be extended to convert a feature hash into a feature object reversing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 the processing done by GFFSerializer.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 package Bio::EnsEMBL::Utils::IO::GFFParser;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 use Bio::EnsEMBL::Utils::Exception;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 use IO::File;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 use URI::Escape;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 use Bio::EnsEMBL::Utils::Scalar qw/wrap_array/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 my %strand_conversion = ( '+' => '1', '?' => '0', '-' => '-1');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 Constructor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 Arg [1] : File handle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 Returntype : Bio::EnsEMBL::Utils::IO::GFFParser
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 my $class = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 my $self = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 filehandle => shift,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 bless $self, $class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 if (!defined($self->{'filehandle'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 throw("GFFParser requires a valid filehandle to a GFF3 formatted file");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 =head2 parse_header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 Arg [1] : File handle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 Description: Returns a arrayref with each header line stored in array element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 Returntype : Arrayref of GFF3 file header lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 sub parse_header {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 my $next_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 my @header_lines;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 while (($next_line = $self->_read_line()) && ($next_line =~ /^[\#|\s]/) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 #stop parsing features if ##FASTA directive encountered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 last if ($next_line =~ /\#\#FASTA/ );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 #header lines start with ## (except for the ##FASTA directive indicating sequence section)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 if ($next_line =~ /^[\#]{2}/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 push @header_lines, $next_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 if ($next_line =~ /gff-version\s+(\d+)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 if ($1 != 3) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 warning("File has been formatted in GFF version $1. GFFParser may return unexpected results as it is designed to parse GFF3 formatted files.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 if (defined($next_line)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 $self->{'first_non_header_line'} = $next_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 return \@header_lines;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 =head2 parse_next_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 Arg [1] : File handle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 Description: Returns a hashref in the format -
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 seqid => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 source => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 type => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 start => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 end => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 score => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 strand => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 phase => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 attribute => hashref,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 Returntype : Hashref of a GFF3 feature line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 sub parse_next_feature {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 my $next_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 my $feature_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 while (($next_line = $self->_read_line() ) && defined($next_line) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 #stop parsing features if ##FASTA directive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 last if ($next_line =~ /\#\#FASTA/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 next if ($next_line =~ /^\#/ || $next_line =~ /^\s*$/ ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 $next_line =~ /^\/\//);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 $feature_line = $next_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 return undef unless $feature_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 my %feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 my %attribute;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 #strip off trailing comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 $feature_line =~ s/\#.*//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 my @chunks = split(/\t/, $feature_line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 %feature = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 'seqid' => uri_unescape($chunks[0]),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 'source' => uri_unescape($chunks[1]),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 'type' => uri_unescape($chunks[2]),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 'start' => $chunks[3],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 'end' => $chunks[4],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 'score' => $chunks[5],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 'strand' => $strand_conversion{$chunks[6]},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 'phase' => $chunks[7]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 if ($chunks[8]) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 my @attributes = split( /;/, $chunks[8] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 my %attributes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 foreach my $attribute (@attributes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 my ( $name, $value ) = split( /=/, $attribute );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 $name = uri_unescape($name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 my @split_values = map { uri_unescape($_) } split(/,/, $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 if(scalar(@split_values) > 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 $attributes{$name} = \@split_values;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 $attributes{$name} = $split_values[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 $feature{'attribute'} = \%attributes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 return \%feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 =head2 parse_next_sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 Arg [1] : File handle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 Description: Returns a hashref in the format -
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 header => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 sequence => scalar,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 Returntype : Hashref of a GFF3 sequence line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 sub parse_next_sequence {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 my $next_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 my $sequence;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 my $header;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 while (($next_line = $self->_read_line() ) && defined($next_line) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 next if ($next_line =~ /^\#/ || $next_line =~ /^\s*$/ ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 $next_line =~ /^\/\//);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 if ($next_line =~ /^>/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 if ($header) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 #next fasta header encountered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 $self->{'next_fasta_header'} = $next_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 $header = $next_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 $sequence .= $next_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 return undef unless ($sequence || $header);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 my %sequence = (header => $header , sequence => $sequence );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 return \%sequence;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 sub _read_line {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 my $fh = $self->{'filehandle'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 my $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 if (defined($self->{'first_non_header_line'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 $line = $self->{'first_non_header_line'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 $self->{'first_non_header_line'} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 } elsif ( defined($self->{'next_fasta_header'} )) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 $line = $self->{'next_fasta_header'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 $self->{'next_fasta_header'} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 $line = <$fh>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 if (defined($line)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 chomp $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 if (!$line) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 #parse next line if current line is empty
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 $line = $self->_read_line();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 return $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 sub close {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 $self->{"filehandle"} = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 1;