annotate variant_effect_predictor/Bio/EnsEMBL/Utils/SeqDumper.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 =head1 LICENSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 This software is distributed under a modified Apache license.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 For license details, please see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 =head1 CONTACT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 <helpdesk@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 Bio::EnsEMBL::Utils::SeqDumper
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 $seq_dumper = Bio::EnsEMBL::Utils::SeqDumper->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 # don't dump snps or repeats
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 $seq_dumper->disable_feature_type('repeat');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 $seq_dumper->disable_feature_type('variation');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 # dump EMBL format to STDOUT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 $seq_dumper->dump( $slice, 'EMBL' );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36 # dump GENBANK format to a file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 $seq_dumper->dump( $slice, 'GENBANK', 'out.genbank' );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 # dump FASTA format to a file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40 $seq_dumper->dump( $slice, 'FASTA', 'out.fasta' );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44 A relatively simple and lite-weight flat file dumper for Ensembl slices.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 The memory efficiency could be improved and this is currently not very
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 good for dumping very large sequences such as whole chromosomes.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 =head1 METHODS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 package Bio::EnsEMBL::Utils::SeqDumper;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 use IO::File;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 use vars qw(@ISA);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 #keys must be uppercase
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 my $DUMP_HANDLERS =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 { 'FASTA' => \&dump_fasta,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 'EMBL' => \&dump_embl,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 'GENBANK' => \&dump_genbank };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68 my @COMMENTS =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 ('This sequence was annotated by the Ensembl system. Please visit ' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 'the Ensembl web site, http://www.ensembl.org/ for more information.',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 'All feature locations are relative to the first (5\') base ' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 'of the sequence in this file. The sequence presented is '.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 'always the forward strand of the assembly. Features ' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 'that lie outside of the sequence contained in this file ' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 'have clonal location coordinates in the format: ' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 '<clone accession>.<version>:<start>..<end>',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 'The /gene indicates a unique id for a gene, /note="transcript_id=..."' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80 ' a unique id for a transcript, /protein_id a unique id for a peptide ' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 'and note="exon_id=..." a unique id for an exon. These ids are ' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 'maintained wherever possible between versions.',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84 'All the exons and transcripts in Ensembl are confirmed by ' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 'similarity to either protein or cDNA sequences.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 =head2 new
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 Arg [1] : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 Example : $seq_dumper = Bio::EnsEMBL::Utils::SeqDumper->new;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 Description: Creates a new SeqDumper
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 Returntype : Bio::EnsEMBL::Utils::SeqDumper
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 sub new {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 my ($caller, $slice, $params) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 my $class = ref($caller) || $caller;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 my $feature_types = {'gene' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 'genscan' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 'repeat' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 'similarity' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 'variation' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 'contig' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 'marker' => 1,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 'estgene' => 0,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 'vegagene' => 0};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114 my $self = bless {'feature_types' => $feature_types}, $class;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 foreach my $p (sort keys %{$params || {}}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 $self->{$p} = $params->{$p};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 return $self;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 =head2 enable_feature_type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 Arg [1] : string $type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 Example : $seq_dumper->enable_feature_type('similarity');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 Description: Enables the dumping of a specific type of feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131 Exceptions : warn if invalid feature type is passed,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 thrown if no feature type is passed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 sub enable_feature_type {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138 my ($self, $type) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140 $type || throw("type arg is required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 if(exists($self->{'feature_types'}->{$type})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143 $self->{'feature_types'}->{$type} = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145 warning("unknown feature type '$type'\n" .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 "valid types are: " . join(',', keys %{$self->{'feature_types'}}));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 =head2 attach_database
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 Arg [1] : string name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155 Arg [2] : Bio::EnsEMBL::DBSQL::DBAdaptor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 Example : $seq_dumper->attach_database('estgene', $estgene_db);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 Description: Attaches a database to the seqdumper that can be used to
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 dump data which is external to the ensembl core database.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159 Currently this is necessary to dump est genes and vega genes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 Exceptions : thrown if incorrect argument is supplied
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166 sub attach_database {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 my ($self, $name, $db) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169 $name || throw("name arg is required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 unless($db && ref($db) && $db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 throw("db arg must be a Bio::EnsEMBL::DBSQL::DBAdaptor not a [$db]");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 $self->{'attached_dbs'}->{$name} = $db;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 =head2 get_database
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181 Arg [1] : string $name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 Example : $db = $seq_dumper->get_database('vega');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183 Description: Retrieves a database that has been attached to the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 seqdumper via the attach database call.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186 Exceptions : thrown if incorrect argument is supplied
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187 Caller : dump_feature_table
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 sub get_database {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 my ($self, $name) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 $name || throw("name arg is required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196 return $self->{'attached_dbs'}->{$name};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 =head2 remove_database
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 Arg [1] : string $name
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 Example : $db = $seq_dumper->remove_database('estgene');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205 Description: Removes a database that has been attached to the seqdumper
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 via the attach database call. The database that is removed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207 is returned (or undef if it did not exist).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209 Exceptions : thrown if incorrect argument is supplied
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 sub remove_database {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 my ($self, $name) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 $name || throw("name arg is required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 if(exists $self->{'attached_dbs'}->{$name}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 return delete $self->{'attached_dbs'}->{$name};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 return undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 =head2 disable_feature_type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 Arg [1] : string $type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230 Example : $seq_dumper->disable_feature_type('genes');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 Description: Disables the dumping of a specific type of feature
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233 Exceptions : warn if an invalid feature type is passed,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234 thrown if no feature type is passed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 sub disable_feature_type {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240 my ($self, $type) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242 $type || throw("type arg is required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244 if(exists($self->{'feature_types'}->{$type})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 $self->{'feature_types'}->{$type} = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 warning("unknown feature type '$type'\n" .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248 "valid types are: " . join(',', keys %{$self->{'feature_types'}}));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254 =head2 is_enabled
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 Arg [1] : string $type
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 Example : do_something() if($seq_dumper->is_enabled('gene'));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258 Description: checks if a specific feature type is enabled
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 Exceptions : warning if invalid type is passed,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261 thrown if no type is passed
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266 sub is_enabled {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 my ($self, $type) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 $type || throw("type arg is required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271 if(exists($self->{'feature_types'}->{$type})) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 return $self->{'feature_types'}->{$type};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 warning("unknown feature type '$type'\n" .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 "valid types are: " . join(',', keys %{$self->{'feature_types'}}));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 =head2 dump
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 Arg [1] : Bio::EnsEMBL::Slice slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 The slice to dump
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 Arg [2] : string $format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 The name of the format to dump
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 Arg [3] : (optional) $outfile
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 The name of the file to dump to. If no file is specified STDOUT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 is used
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 Arg [4] : (optional) $seq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 Sequence to dump
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 Arg [4] : (optional) $no_append
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 Default action is to open the file in append mode. This will
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293 turn that mode off
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 Example : $seq_dumper->dump($slice, 'EMBL');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 Description: Dumps a region of a genome specified by the slice argument into
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 an outfile of the format $format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 Exceptions : thrown if slice or format args are not supplied
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299 Caller : general
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 sub dump {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 my ($self, $slice, $format, $outfile, $seq, $no_append) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 $format || throw("format arg is required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308 $slice || throw("slice arg is required");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 my $dump_handler = $DUMP_HANDLERS->{uc($format)};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 unless($dump_handler) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 throw("No dump handler is defined for format $format\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 my $FH = IO::File->new;;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318 if($outfile) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 my $mode = ($no_append) ? '>' : '>>';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320 $FH->open("${mode}${outfile}") or throw("Could not open file $outfile: $!");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322 $FH = \*STDOUT;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 #mod_perl did not like the following
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324 #$FH->fdopen(fileno(STDOUT), "w")
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 # or throw("Could not open currently selected output filehandle " .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 # "for writing");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329 &$dump_handler($self, $slice, $FH, $seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 $FH->close if ($outfile); #close if we were writing to a file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 =head2 dump_embl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336 Arg [1] : Bio::EnsEMBL::Slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 Arg [2] : IO::File $FH
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338 Arg [3] : optional sequence string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 Example : $seq_dumper->dump_embl($slice, $FH);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 Description: Dumps an EMBL flat file to an open file handle
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343 Caller : dump
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 sub dump_embl {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 my $slice = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 my $FH = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351 my $SEQ = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353 my $len = $slice->length;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355 my $version;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 my $acc;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 my $cs = $slice->coord_system();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 my $name_str = $cs->name() . ' ' . $slice->seq_region_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 $name_str .= ' ' . $cs->version if($cs->version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 my $start = $slice->start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363 my $end = $slice->end;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 #determine if this slice is the entire seq region
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366 #if it is then we just use the name as the id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 my $slice_adaptor = $slice->adaptor();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 my $full_slice =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 $slice->adaptor->fetch_by_region($cs->name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370 $slice->seq_region_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 undef,undef,undef,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372 $cs->version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 my $entry_name = $slice->seq_region_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 if($full_slice->name eq $slice->name) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380 $name_str .= ' full sequence';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 $acc = $slice->seq_region_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 my @acc_ver = split(/\./, $acc);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 if(@acc_ver == 2) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 $acc = $acc_ver[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 $version = $acc_ver[0] . '.'. $acc_ver[1];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 } elsif(@acc_ver == 1 && $cs->version()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 $version = $acc . '.'. $cs->version();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 $version = $acc;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 $name_str .= ' partial sequence';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 $acc = $slice->name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394 $version = $acc;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 $acc = $slice->name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 #line breaks are allowed near the end of the line on ' ', "\t", "\n", ','
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402 $: = (" \t\n-,");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 #############
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 # dump header
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406 #############
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408 my $EMBL_HEADER =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 '@< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 #ID and moltype
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 # HTG = High Throughput Genome division, probably most suitable
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 # and it would be hard to come up with another appropriate division
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415 # that worked for all organisms (e.g. plants are in PLN but human is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 # in HUM).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417 my $VALUE = "$entry_name standard; DNA; HTG; $len BP.";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 $self->write($FH, $EMBL_HEADER, 'ID', $VALUE);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 $self->print( $FH, "XX\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 #Accession
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422 $self->write($FH, $EMBL_HEADER, 'AC', $acc);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 $self->print( $FH, "XX\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425 #Version
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 $self->write($FH, $EMBL_HEADER, 'SV', $version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 $self->print( $FH, "XX\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429 #Date
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 $self->write($FH, $EMBL_HEADER, 'DT', $self->_date_string);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431 $self->print( $FH, "XX\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433 my $meta_container = $slice->adaptor()->db()->get_MetaContainer();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435 #Description
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436 $self->write($FH, $EMBL_HEADER, 'DE', $meta_container->get_scientific_name() .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437 " $name_str $start..$end annotated by Ensembl");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 $self->print( $FH, "XX\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440 #key words
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 $self->write($FH, $EMBL_HEADER, 'KW', '.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442 $self->print( $FH, "XX\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444 #Species
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 my $species_name = $meta_container->get_scientific_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446 if(my $cn = $meta_container->get_common_name()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447 $species_name .= " ($cn)";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 $self->write($FH, $EMBL_HEADER, 'OS', $species_name);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452 #Classification
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 my $cls = $meta_container->get_classification();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454 $self->write($FH, $EMBL_HEADER, 'OC', join('; ', reverse(@{$cls})) . '.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 $self->print( $FH, "XX\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 #References (we are not dumping refereneces)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 #Database References (we are not dumping these)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 #comments
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462 foreach my $comment (@COMMENTS) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463 $self->write($FH, $EMBL_HEADER, 'CC', $comment);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 $self->print( $FH, "XX\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 ####################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468 #DUMP FEATURE TABLE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 ####################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470 $self->print( $FH, "FH Key Location/Qualifiers\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472 my $FEATURE_TABLE =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 'FT ^<<<<<<<<<<<<<<<^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474 ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 $self->_dump_feature_table($slice, $FH, $FEATURE_TABLE);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477 #write an XX after the feature tables
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 $self->print( $FH, "XX\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 ###################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 #DUMP SEQUENCE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 ###################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 if(!defined($SEQ)){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485 $SEQ = $slice->seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487 # my $SEQ = $slice->seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488 my $length = length($SEQ);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 my $a_count = $SEQ =~ tr/aA/aA/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490 my $c_count = $SEQ =~ tr/cC/cC/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 my $t_count = $SEQ =~ tr/tT/tT/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492 my $g_count = $SEQ =~ tr/gG/gG/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 my $other_count = $length - $a_count - $c_count - $t_count - $g_count;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 my $value = "Sequence $length BP; $a_count A; $c_count C; " .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 "$g_count G; $t_count T; $other_count other;";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497 $self->write($FH, $EMBL_HEADER, 'SQ', $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499 $self->write_embl_seq($FH, \$SEQ);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 $self->print( $FH, "//\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 # Set formatting back to normal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 $: = " \n-";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 =head2 dump_genbank
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 Arg [1] : Bio::EnsEMBL::Slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 Arg [2] : IO::File $FH
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515 Example : $seq_dumper->dump_genbank($slice, $FH);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 Description: Dumps a GENBANK flat file to an open file handle
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519 Caller : dump
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523 sub dump_genbank {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524 my ($self, $slice, $FH, $SEQ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 #line breaks are allowed near the end of the line on ' ', "\t", "\n", ','
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527 $: = " \t\n-,";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 my $GENBANK_HEADER =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 '^<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531 ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533 my $GENBANK_SUBHEADER =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 ' ^<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535 ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 my $GENBANK_FT =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538 ' ^<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539 ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 my $version;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542 my $acc;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544 my $cs = $slice->coord_system();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546 my $name_str = $cs->name() . ' ' . $slice->seq_region_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548 $name_str .= ' ' . $cs->version if($cs->version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550 #determine if this slice is the entire seq region
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551 #if it is then we just use the name as the id
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 my $slice_adaptor = $slice->adaptor();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553 my $full_slice =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 $slice->adaptor->fetch_by_region($cs->name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555 $slice->seq_region_name,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556 undef,undef,undef,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 $cs->version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 my $entry_name = $slice->seq_region_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562 if($full_slice->name eq $slice->name) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563 $name_str .= ' full sequence';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564 $acc = $slice->seq_region_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565 my @acc_ver = split(/\./, $acc);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566 if(@acc_ver == 2) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567 $acc = $acc_ver[0];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568 $version = $acc_ver[0] . $acc_ver[1];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 } elsif(@acc_ver == 1 && $cs->version()) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 $version = $acc . $cs->version();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 $version = $acc;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575 $name_str .= ' partial sequence';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576 $acc = $slice->name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 $version = $acc;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580 $acc = $slice->name(); # to keep format consistent for all
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 my $length = $slice->length;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583 my $start = $slice->start();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584 my $end = $slice->end();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 my $date = $self->_date_string;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588 my $meta_container = $slice->adaptor()->db()->get_MetaContainer();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590 #LOCUS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591 my $tag = 'LOCUS';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592 my $value = "$entry_name $length bp DNA HTG $date";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 $self->write($FH, $GENBANK_HEADER, $tag, $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595 #DEFINITION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596 $tag = "DEFINITION";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597 $value = $meta_container->get_scientific_name() .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598 " $name_str $start..$end reannotated via EnsEMBL";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 $self->write($FH, $GENBANK_HEADER, $tag, $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601 #ACCESSION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 $self->write($FH, $GENBANK_HEADER, 'ACCESSION', $acc);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604 #VERSION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605 $self->write($FH, $GENBANK_HEADER, 'VERSION', $version);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607 # KEYWORDS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608 $self->write($FH, $GENBANK_HEADER, 'KEYWORDS', '.');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610 # SOURCE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611 my $common_name = $meta_container->get_common_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612 $common_name = $meta_container->get_scientific_name() unless $common_name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613 $self->write($FH, $GENBANK_HEADER, 'SOURCE', $common_name);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615 #organism
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616 my @cls = $meta_container->get_classification();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 shift @cls;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618 $self->write($FH, $GENBANK_SUBHEADER, 'ORGANISM', $meta_container->get_scientific_name());
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619 $self->write($FH, $GENBANK_SUBHEADER, '', join('; ', reverse @cls) . ".");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621 #refereneces
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
622
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
623 #comments
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
624 foreach my $comment (@COMMENTS) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
625 $self->write($FH, $GENBANK_HEADER, 'COMMENT', $comment);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
626 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
627
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
628 ####################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
629 # DUMP FEATURE TABLE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
630 ####################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
631 $self->print( $FH, "FEATURES Location/Qualifiers\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
632 $self->_dump_feature_table($slice, $FH, $GENBANK_FT);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
633
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
634 ####################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
635 # DUMP SEQUENCE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
636 ####################
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
637
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
638 if(!defined($SEQ)){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
639 $SEQ = $slice->seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
640 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
641 # my $SEQ = $slice->seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
642 my $a_count = $SEQ =~ tr/aA/aA/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
643 my $c_count = $SEQ =~ tr/cC/cC/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
644 my $t_count = $SEQ =~ tr/tT/tT/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
645 my $g_count = $SEQ =~ tr/gG/gG/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
646 my $bp_length = length($SEQ);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
647 my $other_count = $bp_length - $a_count - $c_count - $t_count - $g_count;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
648
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
649 $tag = 'BASE COUNT';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
650 $value = "$a_count a $c_count c $g_count g $t_count t";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
651 $value .= " $other_count n" if($other_count);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
652 $self->write($FH, $GENBANK_HEADER, $tag, $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
653 $self->print( $FH, "ORIGIN\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
654
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
655 $self->write_genbank_seq($FH, \$SEQ);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
656
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
657 $self->print( $FH, "//\n" );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
658
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
659 # Set formatting back to normal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
660 $: = " \n-";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
661 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
662
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
663
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
664
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
665 =head2 _dump_feature_table
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
666
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
667 Arg [1] : Bio::EnsEMBL::Slice slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
668 Example : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
669 Description: Helper method used to dump feature tables used in EMBL, FASTA,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
670 GENBANK. Assumes formating of file handle has been setup
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
671 already to use $FEAT and $VALUE values.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
672 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
673 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
674 Caller : internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
675
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
676 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
677
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
678 sub _dump_feature_table {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
679 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
680 my $slice = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
681 my $FH = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
682 my $FORMAT = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
683
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
684 #use only the core database to dump features (except for bloody snps)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
685 my $lite = $slice->adaptor->db->remove_db_adaptor('lite');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
686
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
687 my $meta = $slice->adaptor->db->get_MetaContainer;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
688
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
689 #lump file handle and format string together for simpler method calls
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
690 my @ff = ($FH, $FORMAT);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
691 my $value;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
692
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
693 #source
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
694 my $classification = join(', ', $meta->get_classification());
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
695 $self->write(@ff,'source', "1.." . $slice->length());
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
696 $self->write(@ff,'' , '/organism="'.$meta->get_scientific_name(). '"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
697 $self->write(@ff,'' , '/db_xref="taxon:'.$meta->get_taxonomy_id().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
698
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
699 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
700 # Transcripts & Genes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
701 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
702 my @gene_slices;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
703 if($self->is_enabled('gene')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
704 push @gene_slices, $slice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
705 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
706
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
707 # Retrieve slices of other database where we need to pull genes from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
708
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
709 my $gene_dbs = {'vegagene' => 'vega',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
710 'estgene' => 'estgene'};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
711
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
712 foreach my $gene_type (keys %$gene_dbs) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
713 if($self->is_enabled($gene_type)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
714 my $db = $self->get_database($gene_dbs->{$gene_type});
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
715 if($db) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
716 my $sa = $db->get_SliceAdaptor();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
717 push @gene_slices, $sa->fetch_by_name($slice->name());
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
718 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
719 warning("A [". $gene_dbs->{$gene_type} ."] database must be " .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
720 "attached to this SeqDumper\n(via a call to " .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
721 "attach_database) to retrieve genes of type [$gene_type]");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
722 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
723 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
724 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
725
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
726 foreach my $gene_slice (@gene_slices) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
727 my @genes = @{$gene_slice->get_all_Genes(undef,undef, 1)};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
728 while(my $gene = shift @genes) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
729 $value = $self->features2location( [$gene] );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
730 $self->write( @ff, 'gene', $value );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
731 $self->write( @ff, "", '/gene='.$gene->stable_id() );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
732
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
733
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
734 if(defined($gene->display_xref)){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
735 $self->write( @ff, "",'/locus_tag="'.$gene->display_xref->display_id.'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
736 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
737 my $desc = $gene->description;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
738 if(defined($desc) and $desc ne ""){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
739 $desc =~ s/\"//;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
740 $self->write( @ff, "", '/note="'.$gene->description.'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
741 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
742
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
743
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
744
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
745 foreach my $transcript (@{$gene->get_all_Transcripts}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
746 my $translation = $transcript->translation;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
747
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
748 # normal transcripts get dumped differently than pseudogenes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
749 if($translation) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
750 #normal transcript
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
751 $value = $self->features2location($transcript->get_all_Exons);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
752 $self->write(@ff, 'mRNA', $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
753 $self->write(@ff,'' , '/gene="'.$gene->stable_id().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
754 $self->write(@ff,''
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
755 ,'/note="transcript_id='.$transcript->stable_id().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
756
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
757 # ...and a CDS section
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
758 $value =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
759 $self->features2location($transcript->get_all_translateable_Exons);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
760 $self->write(@ff,'CDS', $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
761 my $codon_start = $self->transcript_to_codon_start($transcript);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
762 $self->write(@ff,'' , qq{/codon_start="${codon_start}"}) if $codon_start > 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
763 $self->write(@ff,'' , '/gene="'.$gene->stable_id().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
764 $self->write(@ff,'' , '/protein_id="'.$translation->stable_id().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
765 $self->write(@ff,'' ,'/note="transcript_id='.$transcript->stable_id().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
766
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
767 foreach my $dbl (@{$transcript->get_all_DBLinks}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
768 $value = '/db_xref="'.$dbl->dbname().':'.$dbl->display_id().'"';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
769 $self->write(@ff, '', $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
770 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
771
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
772 $value = '/translation="'.$transcript->translate()->seq().'"';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
773 $self->write(@ff, '', $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
774 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
775 #pseudogene
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
776 $value = $self->features2location($transcript->get_all_Exons);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
777 $self->write(@ff, 'misc_RNA', $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
778 $self->write(@ff,'' , '/gene="'.$gene->stable_id().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
779 foreach my $dbl (@{$transcript->get_all_DBLinks}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
780 $value = '/db_xref="'.$dbl->dbname().':'.$dbl->primary_id().'"';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
781 $self->write(@ff, '', $value);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
782 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
783 $self->write(@ff,'' , '/note="'.$transcript->biotype().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
784 $self->write(@ff,''
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
785 ,'/note="transcript_id='.$transcript->stable_id().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
786 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
787 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
788 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
789
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
790 # exons
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
791 foreach my $gene (@{$gene_slice->get_all_Genes(undef,undef,1)}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
792 foreach my $exon (@{$gene->get_all_Exons}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
793 $self->write(@ff,'exon', $self->features2location([$exon]));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
794 $self->write(@ff,'' , '/note="exon_id='.$exon->stable_id().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
795 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
796 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
797 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
798
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
799 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
800 # genscans
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
801 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
802 if($self->is_enabled('genscan')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
803 my @genscan_exons;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
804 my @transcripts = @{$slice->get_all_PredictionTranscripts(undef,1)};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
805 while(my $transcript = shift @transcripts) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
806 my $exons = $transcript->get_all_Exons();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
807 push @genscan_exons, @$exons;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
808 $self->write(@ff, 'mRNA', $self->features2location($exons));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
809 $self->write(@ff, '', '/product="'.$transcript->translate()->seq().'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
810 $self->write(@ff, '', '/note="identifier='.$transcript->stable_id.'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
811 $self->write(@ff, '', '/note="Derived by automated computational' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
812 ' analysis using gene prediction method:' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
813 $transcript->analysis->logic_name . '"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
814 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
815 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
816
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
817 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
818 # snps
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
819 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
820 if($self->is_enabled('variation') && $slice->can('get_all_VariationFeatures')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
821 # $slice->adaptor->db->add_db_adaptor('lite', $lite) if $lite;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
822
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
823 my @variations = @{$slice->get_all_VariationFeatures()};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
824 while(my $snp = shift @variations) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
825 my $ss = $snp->start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
826 my $se = $snp->end;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
827 #skip snps that hang off edge of slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
828 next if($ss < 1 || $se > $slice->length);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
829
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
830 $self->write(@ff, 'variation', "$ss..$se");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
831 $self->write(@ff, '' , '/replace="'.$snp->allele_string.'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
832 #$self->write(@ff, '' , '/evidence="'.$snp->status.'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
833 my $rs_id = $snp->variation_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
834 my $db = $snp->source();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
835 # foreach my $link ($snp->each_DBLink) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
836 # my $id = $link->primary_id;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
837 # my $db = $link->database;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
838 $self->write(@ff, '', "/db_xref=\"$db:$rs_id\"");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
839 # }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
840 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
841
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
842 # $slice->adaptor->db->remove_db_adaptor('lite') if $lite;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
843 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
844
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
845 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
846 # similarity features
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
847 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
848 if($self->is_enabled('similarity')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
849 foreach my $sim (@{$slice->get_all_SimilarityFeatures}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
850 $self->write(@ff, 'misc_feature', $self->features2location([$sim]));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
851 $self->write(@ff, '' , '/note="match: '.$sim->hseqname.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
852 ' : '.$sim->hstart.'..'.$sim->hend.'('.$sim->hstrand.')"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
853 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
854 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
855
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
856 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
857 # repeats
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
858 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
859 if($self->is_enabled('repeat')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
860 my @rfs = @{$slice->get_all_RepeatFeatures()};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
861
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
862 while(my $repeat = shift @rfs) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
863 $self->write(@ff, 'repeat_region', $self->features2location([$repeat]));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
864 $self->write(@ff, '' , '/note="' . $repeat->repeat_consensus->name.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
865 ' repeat: matches ' . $repeat->hstart.'..'.$repeat->hend .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
866 '('.$repeat->hstrand.') of consensus"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
867 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
868
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
869 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
870
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
871 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
872 # markers
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
873 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
874 if($self->is_enabled('marker') && $slice->can('get_all_MarkerFeatures')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
875 my @markers = @{$slice->get_all_MarkerFeatures()};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
876 while(my $mf = shift @markers) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
877 $self->write(@ff, 'STS', $self->features2location([$mf]));
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
878 if($mf->marker->display_MarkerSynonym) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
879 $self->write(@ff, '' , '/standard_name="' .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
880 $mf->marker->display_MarkerSynonym->name . '"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
881 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
882
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
883
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
884 #grep out synonyms without a source
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
885 my @synonyms = @{$mf->marker->get_all_MarkerSynonyms};
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
886 @synonyms = grep {$_->source } @synonyms;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
887 foreach my $synonym (@synonyms) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
888 $self->write(@ff, '', '/db_xref="'.$synonym->source.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
889 ':'.$synonym->name.'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
890 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
891 $self->write(@ff, '', '/note="map_weight='.$mf->map_weight.'"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
892 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
893 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
894
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
895 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
896 # contigs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
897 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
898 if($self->is_enabled('contig')) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
899 foreach my $segment (@{$slice->project('seqlevel')}) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
900 my ($start, $end, $slice) = @$segment;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
901 $self->write(@ff, 'misc_feature',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
902 $start .'..'. $end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
903 $self->write(@ff, '', '/note="contig '.$slice->seq_region_name .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
904 ' ' . $slice->start . '..' . $slice->end .
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
905 '(' . $slice->strand . ')"');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
906 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
907 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
908
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
909 $slice->adaptor->db->add_db_adaptor('lite', $lite) if $lite;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
910
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
911 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
912
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
913 # /codon_start= is the first base to start translating from. This maps
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
914 # Ensembl start exon phase to this concept. Here we present the usage
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
915 # of phase in this concept where each row shows the sequence the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
916 # spliced_seq() method will return
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
917
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
918 # 123456789
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
919 # ATTATGACG
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
920 # Phase == 0 ...+++### codon_start=0 // start from 1st A
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
921 # Phase == 1 ..+++### codon_start=3 // start from 2nd A (base 3 in the given spliced sequence)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
922 # Phase == 2 .+++### codon_start=2 // start from 2nd A (base 2 in the spliced sequence)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
923 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
924 # In the case of the final 2 phases we will generate a X codon
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
925 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
926
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
927 sub transcript_to_codon_start {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
928 my ($self, $transcript) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
929 my $start_phase = $transcript->start_Exon()->phase();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
930 return ( $start_phase == 1 ) ? 3 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
931 ( $start_phase == 2 ) ? 2 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
932 ( $start_phase == 0 ) ? 1 :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
933 -1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
934 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
935
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
936
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
937 =head2 dump_fasta
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
938
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
939 Arg [1] : Bio::EnsEMBL::Slice
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
940 Arg [2] : IO::File $FH
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
941 Example : $seq_dumper->dump_fasta($slice, $FH);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
942 Description: Dumps an FASTA flat file to an open file handle
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
943 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
944 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
945 Caller : dump
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
946
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
947 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
948
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
949 sub dump_fasta {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
950 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
951 my $slice = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
952 my $FH = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
953
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
954 my $id = $slice->seq_region_name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
955 my $seqtype = 'dna';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
956 my $idtype = $slice->coord_system->name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
957 my $location = $slice->name;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
958 my $start = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
959 my $end = $slice->length();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
960
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
961 my $header = ">$id $seqtype:$idtype $location\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
962 $self->print( $FH, $header );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
963
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
964 #set the formatting to FASTA
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
965 my $FORMAT = '^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
966 ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
967
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
968 #chunk the sequence in 60kb chunks to use less memory
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
969 my $cur = $start;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
970 while($cur <= $end) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
971 my $to = $cur + 59_999;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
972 $to = $end if($to > $end);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
973 my $seq = $slice->subseq($cur, $to);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
974 $cur = $to + 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
975 $self->write($FH, $FORMAT, $seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
976 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
977 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
978
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
979
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
980
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
981 =head2 features2location
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
982
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
983 Arg [1] : listref of Bio::EnsEMBL::SeqFeatures
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
984 Example : $location = $self->features2location(\@features);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
985 Description: Constructs an EMBL location string from a list of features
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
986 Returntype : string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
987 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
988 Caller : internal
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
989
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
990 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
991
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
992 sub features2location {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
993 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
994 my $features = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
995
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
996 my @join = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
997
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
998 foreach my $f (@$features) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
999 my $slice = $f->slice;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1000 my $start = $f->start();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1001 my $end = $f->end();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1002 my $strand = $f->strand();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1003
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1004 if($start >= 1 && $end <= $slice->length) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1005 #this feature in on a slice and doesn't lie outside the boundary
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1006
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1007 if($strand == 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1008 push @join, "$start..$end";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1009 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1010 push @join, "complement($start..$end)";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1011 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1012 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1013 my @fs = ();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1014 #this feature is outside the boundary of the dump,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1015 # yet implemented and 'seqlevel' is guaranteed to be 1step
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1016 my $projection = $f->project('seqlevel');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1017 foreach my $segment (@$projection) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1018 my $slice = $segment->[2];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1019 my $slc_start = $slice->start();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1020 my $slc_end = $slice->end();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1021 my $seq_reg = $slice->seq_region_name();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1022 if($slice->strand == 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1023 push @join, "$seq_reg:$slc_start..$slc_end";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1024 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1025 push @join, "complement($seq_reg:$slc_start..$slc_end)";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1026 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1027 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1028 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1029 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1030
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1031 my $out = join ',', @join;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1032
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1033 if(scalar @join > 1) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1034 $out = "join($out)";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1035 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1036
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1037 return $out;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1038 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1039
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1040
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1041 sub _date_string {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1042 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1043
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1044 my ($sec, $min, $hour, $mday,$mon, $year) = localtime(time());
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1045
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1046 my $month = ('JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1047 'AUG', 'SEP', 'OCT', 'NOV', 'DEC')[$mon];
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1048 $year += 1900;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1049
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1050 return "$mday-$month-$year";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1051 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1052
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1053
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1054 sub write {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1055 my ($self, $FH, $FORMAT, @values) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1056
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1057 #while the last value still contains something
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1058 while(defined($values[-1]) and $values[-1] ne '') {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1059 formline($FORMAT, @values);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1060 $self->print( $FH, $^A );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1061 $^A = '';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1062 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1063 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1064
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1065 sub write_genbank_seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1066 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1067 my $FH = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1068 my $seq = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1069 my $base_total = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1070
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1071 $base_total ||= 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1072
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1073 my $GENBANK_SEQ =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1074 '@>>>>>>>> ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<<~
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1075 ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1076
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1077 my $total = -59 + $base_total;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1078 #keep track of total and print lines of 60 bases with spaces every 10bp
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1079 while($$seq) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1080 $total += 60;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1081 formline($GENBANK_SEQ,$total, $$seq, $$seq, $$seq, $$seq, $$seq, $$seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1082 $self->print( $FH, $^A );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1083 $^A = '';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1084 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1085 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1086
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1087 sub write_embl_seq {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1088 my $self = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1089 my $FH = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1090 my $seq = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1091 my $base_total = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1092
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1093 $base_total ||= 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1094
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1095 my $EMBL_SEQ =
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1096 ' ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<<@>>>>>>>>>~
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1097 ';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1098 #keep track of total and print lines of 60 bases with spaces every 10bp
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1099 my $length = length($$seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1100 my $total = $length - $base_total;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1101 while($$seq) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1102 $total -= 60;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1103 $total = 0 if($total < 0);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1104 formline($EMBL_SEQ,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1105 $$seq, $$seq, $$seq, $$seq, $$seq, $$seq,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1106 $length - $total);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1107 $self->print( $FH, $^A );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1108 $^A = '';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1109 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1110 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1111
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1112 sub print {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1113 my( $self, $FH, $string ) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1114 if(!print $FH $string){
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1115 print STDERR "Problem writing to disk\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1116 print STDERR "the string is $string\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1117 die "Could not write to file handle";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1118 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1119 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1120
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1121 1;