annotate variant_effect_predictor/Bio/EnsEMBL/Utils/SeqDumper.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Utils::SeqDumper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 $seq_dumper = Bio::EnsEMBL::Utils::SeqDumper->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 # don't dump snps or repeats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 $seq_dumper->disable_feature_type('repeat');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 $seq_dumper->disable_feature_type('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 # dump EMBL format to STDOUT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 $seq_dumper->dump( $slice, 'EMBL' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 # dump GENBANK format to a file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 $seq_dumper->dump( $slice, 'GENBANK', 'out.genbank' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 # dump FASTA format to a file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 $seq_dumper->dump( $slice, 'FASTA', 'out.fasta' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 A relatively simple and lite-weight flat file dumper for Ensembl slices.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 The memory efficiency could be improved and this is currently not very
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 good for dumping very large sequences such as whole chromosomes.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 package Bio::EnsEMBL::Utils::SeqDumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 use IO::File;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 #keys must be uppercase
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 my $DUMP_HANDLERS =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 { 'FASTA' => \&dump_fasta,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 'EMBL' => \&dump_embl,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 'GENBANK' => \&dump_genbank };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 my @COMMENTS =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 ('This sequence was annotated by the Ensembl system. Please visit ' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 'the Ensembl web site, http://www.ensembl.org/ for more information.',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 'All feature locations are relative to the first (5\') base ' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 'of the sequence in this file. The sequence presented is '.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 'always the forward strand of the assembly. Features ' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 'that lie outside of the sequence contained in this file ' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 'have clonal location coordinates in the format: ' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 '<clone accession>.<version>:<start>..<end>',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 'The /gene indicates a unique id for a gene, /note="transcript_id=..."' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 ' a unique id for a transcript, /protein_id a unique id for a peptide ' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 'and note="exon_id=..." a unique id for an exon. These ids are ' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 'maintained wherever possible between versions.',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 'All the exons and transcripts in Ensembl are confirmed by ' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 'similarity to either protein or cDNA sequences.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 Arg [1] : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 Example : $seq_dumper = Bio::EnsEMBL::Utils::SeqDumper->new;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 Description: Creates a new SeqDumper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 Returntype : Bio::EnsEMBL::Utils::SeqDumper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 my ($caller, $slice, $params) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 my $feature_types = {'gene' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 'genscan' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 'repeat' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 'similarity' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 'variation' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 'contig' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 'marker' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 'estgene' => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 'vegagene' => 0};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 my $self = bless {'feature_types' => $feature_types}, $class;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 foreach my $p (sort keys %{$params || {}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 $self->{$p} = $params->{$p};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 =head2 enable_feature_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 Arg [1] : string $type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 Example : $seq_dumper->enable_feature_type('similarity');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 Description: Enables the dumping of a specific type of feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 Exceptions : warn if invalid feature type is passed,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 thrown if no feature type is passed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 sub enable_feature_type {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 my ($self, $type) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 $type || throw("type arg is required");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 if(exists($self->{'feature_types'}->{$type})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 $self->{'feature_types'}->{$type} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 warning("unknown feature type '$type'\n" .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 "valid types are: " . join(',', keys %{$self->{'feature_types'}}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 =head2 attach_database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 Arg [1] : string name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 Arg [2] : Bio::EnsEMBL::DBSQL::DBAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 Example : $seq_dumper->attach_database('estgene', $estgene_db);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 Description: Attaches a database to the seqdumper that can be used to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 dump data which is external to the ensembl core database.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 Currently this is necessary to dump est genes and vega genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 Exceptions : thrown if incorrect argument is supplied
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 sub attach_database {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 my ($self, $name, $db) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 $name || throw("name arg is required");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 unless($db && ref($db) && $db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 throw("db arg must be a Bio::EnsEMBL::DBSQL::DBAdaptor not a [$db]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 $self->{'attached_dbs'}->{$name} = $db;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 =head2 get_database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 Arg [1] : string $name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 Example : $db = $seq_dumper->get_database('vega');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 Description: Retrieves a database that has been attached to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 seqdumper via the attach database call.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 Exceptions : thrown if incorrect argument is supplied
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 Caller : dump_feature_table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 sub get_database {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 my ($self, $name) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 $name || throw("name arg is required");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 return $self->{'attached_dbs'}->{$name};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 =head2 remove_database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 Arg [1] : string $name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 Example : $db = $seq_dumper->remove_database('estgene');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 Description: Removes a database that has been attached to the seqdumper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 via the attach database call. The database that is removed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 is returned (or undef if it did not exist).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 Exceptions : thrown if incorrect argument is supplied
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 sub remove_database {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 my ($self, $name) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 $name || throw("name arg is required");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 if(exists $self->{'attached_dbs'}->{$name}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 return delete $self->{'attached_dbs'}->{$name};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 =head2 disable_feature_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 Arg [1] : string $type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 Example : $seq_dumper->disable_feature_type('genes');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 Description: Disables the dumping of a specific type of feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 Exceptions : warn if an invalid feature type is passed,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 thrown if no feature type is passed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 sub disable_feature_type {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 my ($self, $type) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 $type || throw("type arg is required");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 if(exists($self->{'feature_types'}->{$type})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 $self->{'feature_types'}->{$type} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 warning("unknown feature type '$type'\n" .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 "valid types are: " . join(',', keys %{$self->{'feature_types'}}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 =head2 is_enabled
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 Arg [1] : string $type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 Example : do_something() if($seq_dumper->is_enabled('gene'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 Description: checks if a specific feature type is enabled
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 Exceptions : warning if invalid type is passed,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 thrown if no type is passed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 sub is_enabled {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 my ($self, $type) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 $type || throw("type arg is required");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 if(exists($self->{'feature_types'}->{$type})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 return $self->{'feature_types'}->{$type};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 warning("unknown feature type '$type'\n" .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 "valid types are: " . join(',', keys %{$self->{'feature_types'}}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 =head2 dump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 Arg [1] : Bio::EnsEMBL::Slice slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 The slice to dump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 Arg [2] : string $format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 The name of the format to dump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 Arg [3] : (optional) $outfile
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 The name of the file to dump to. If no file is specified STDOUT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 is used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 Arg [4] : (optional) $seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 Sequence to dump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 Arg [4] : (optional) $no_append
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 Default action is to open the file in append mode. This will
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 turn that mode off
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 Example : $seq_dumper->dump($slice, 'EMBL');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 Description: Dumps a region of a genome specified by the slice argument into
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 an outfile of the format $format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 Exceptions : thrown if slice or format args are not supplied
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 sub dump {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 my ($self, $slice, $format, $outfile, $seq, $no_append) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 $format || throw("format arg is required");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 $slice || throw("slice arg is required");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 my $dump_handler = $DUMP_HANDLERS->{uc($format)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 unless($dump_handler) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 throw("No dump handler is defined for format $format\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 my $FH = IO::File->new;;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 if($outfile) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 my $mode = ($no_append) ? '>' : '>>';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 $FH->open("${mode}${outfile}") or throw("Could not open file $outfile: $!");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 $FH = \*STDOUT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 #mod_perl did not like the following
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 #$FH->fdopen(fileno(STDOUT), "w")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 # or throw("Could not open currently selected output filehandle " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 # "for writing");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 &$dump_handler($self, $slice, $FH, $seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 $FH->close if ($outfile); #close if we were writing to a file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 =head2 dump_embl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 Arg [1] : Bio::EnsEMBL::Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 Arg [2] : IO::File $FH
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 Arg [3] : optional sequence string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 Example : $seq_dumper->dump_embl($slice, $FH);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 Description: Dumps an EMBL flat file to an open file handle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 Caller : dump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 sub dump_embl {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 my $slice = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 my $FH = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 my $SEQ = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 my $len = $slice->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 my $version;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 my $acc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 my $cs = $slice->coord_system();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 my $name_str = $cs->name() . ' ' . $slice->seq_region_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 $name_str .= ' ' . $cs->version if($cs->version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 my $start = $slice->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 my $end = $slice->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 #determine if this slice is the entire seq region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 #if it is then we just use the name as the id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 my $slice_adaptor = $slice->adaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 my $full_slice =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 $slice->adaptor->fetch_by_region($cs->name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 $slice->seq_region_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 undef,undef,undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 $cs->version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 my $entry_name = $slice->seq_region_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 if($full_slice->name eq $slice->name) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 $name_str .= ' full sequence';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 $acc = $slice->seq_region_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 my @acc_ver = split(/\./, $acc);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 if(@acc_ver == 2) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 $acc = $acc_ver[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 $version = $acc_ver[0] . '.'. $acc_ver[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 } elsif(@acc_ver == 1 && $cs->version()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 $version = $acc . '.'. $cs->version();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 $version = $acc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 $name_str .= ' partial sequence';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 $acc = $slice->name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 $version = $acc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 $acc = $slice->name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 #line breaks are allowed near the end of the line on ' ', "\t", "\n", ','
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 $: = (" \t\n-,");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 #############
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 # dump header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 #############
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 my $EMBL_HEADER =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 '@< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 #ID and moltype
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 # HTG = High Throughput Genome division, probably most suitable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 # and it would be hard to come up with another appropriate division
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 # that worked for all organisms (e.g. plants are in PLN but human is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 # in HUM).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 my $VALUE = "$entry_name standard; DNA; HTG; $len BP.";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 $self->write($FH, $EMBL_HEADER, 'ID', $VALUE);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 $self->print( $FH, "XX\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 #Accession
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 $self->write($FH, $EMBL_HEADER, 'AC', $acc);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 $self->print( $FH, "XX\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 #Version
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 $self->write($FH, $EMBL_HEADER, 'SV', $version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 $self->print( $FH, "XX\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 #Date
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 $self->write($FH, $EMBL_HEADER, 'DT', $self->_date_string);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 $self->print( $FH, "XX\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 my $meta_container = $slice->adaptor()->db()->get_MetaContainer();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 #Description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 $self->write($FH, $EMBL_HEADER, 'DE', $meta_container->get_scientific_name() .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 " $name_str $start..$end annotated by Ensembl");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 $self->print( $FH, "XX\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 #key words
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 $self->write($FH, $EMBL_HEADER, 'KW', '.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 $self->print( $FH, "XX\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 #Species
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 my $species_name = $meta_container->get_scientific_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 if(my $cn = $meta_container->get_common_name()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 $species_name .= " ($cn)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 $self->write($FH, $EMBL_HEADER, 'OS', $species_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 #Classification
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 my $cls = $meta_container->get_classification();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 $self->write($FH, $EMBL_HEADER, 'OC', join('; ', reverse(@{$cls})) . '.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 $self->print( $FH, "XX\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 #References (we are not dumping refereneces)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 #Database References (we are not dumping these)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 #comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 foreach my $comment (@COMMENTS) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 $self->write($FH, $EMBL_HEADER, 'CC', $comment);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 $self->print( $FH, "XX\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 ####################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 #DUMP FEATURE TABLE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 ####################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 $self->print( $FH, "FH Key Location/Qualifiers\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 my $FEATURE_TABLE =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 'FT ^<<<<<<<<<<<<<<<^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 $self->_dump_feature_table($slice, $FH, $FEATURE_TABLE);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 #write an XX after the feature tables
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 $self->print( $FH, "XX\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 ###################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 #DUMP SEQUENCE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 ###################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 if(!defined($SEQ)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 $SEQ = $slice->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 # my $SEQ = $slice->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 my $length = length($SEQ);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 my $a_count = $SEQ =~ tr/aA/aA/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 my $c_count = $SEQ =~ tr/cC/cC/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 my $t_count = $SEQ =~ tr/tT/tT/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 my $g_count = $SEQ =~ tr/gG/gG/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 my $other_count = $length - $a_count - $c_count - $t_count - $g_count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 my $value = "Sequence $length BP; $a_count A; $c_count C; " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 "$g_count G; $t_count T; $other_count other;";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 $self->print($FH, 'SQ '.$value."\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 $self->write_embl_seq($FH, \$SEQ);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 $self->print( $FH, "//\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 # Set formatting back to normal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 $: = " \n-";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 =head2 dump_genbank
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 Arg [1] : Bio::EnsEMBL::Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 Arg [2] : IO::File $FH
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 Example : $seq_dumper->dump_genbank($slice, $FH);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 Description: Dumps a GENBANK flat file to an open file handle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 Caller : dump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 sub dump_genbank {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 my ($self, $slice, $FH, $SEQ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 #line breaks are allowed near the end of the line on ' ', "\t", "\n", ','
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 $: = " \t\n-,";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 my $GENBANK_HEADER =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 '^<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 my $GENBANK_SUBHEADER =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 ' ^<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 my $GENBANK_FT =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 ' ^<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<~~
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539 ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 my $version;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 my $acc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 my $cs = $slice->coord_system();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 my $name_str = $cs->name() . ' ' . $slice->seq_region_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 $name_str .= ' ' . $cs->version if($cs->version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 #determine if this slice is the entire seq region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 #if it is then we just use the name as the id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 my $slice_adaptor = $slice->adaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 my $full_slice =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 $slice->adaptor->fetch_by_region($cs->name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 $slice->seq_region_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 undef,undef,undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 $cs->version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 my $entry_name = $slice->seq_region_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 if($full_slice->name eq $slice->name) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 $name_str .= ' full sequence';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 $acc = $slice->seq_region_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 my @acc_ver = split(/\./, $acc);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 if(@acc_ver == 2) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 $acc = $acc_ver[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568 $version = $acc_ver[0] . $acc_ver[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 } elsif(@acc_ver == 1 && $cs->version()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 $version = $acc . $cs->version();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 $version = $acc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 $name_str .= ' partial sequence';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 $acc = $slice->name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 $version = $acc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 $acc = $slice->name(); # to keep format consistent for all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 my $length = $slice->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 my $start = $slice->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 my $end = $slice->end();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 my $date = $self->_date_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 my $meta_container = $slice->adaptor()->db()->get_MetaContainer();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 #LOCUS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 my $tag = 'LOCUS';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 my $value = "$entry_name $length bp DNA HTG $date";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 $self->write($FH, $GENBANK_HEADER, $tag, $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 #DEFINITION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 $tag = "DEFINITION";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 $value = $meta_container->get_scientific_name() .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 " $name_str $start..$end reannotated via EnsEMBL";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 $self->write($FH, $GENBANK_HEADER, $tag, $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 #ACCESSION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 $self->write($FH, $GENBANK_HEADER, 'ACCESSION', $acc);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 #VERSION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 $self->write($FH, $GENBANK_HEADER, 'VERSION', $version);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 # KEYWORDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 $self->write($FH, $GENBANK_HEADER, 'KEYWORDS', '.');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 # SOURCE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 my $common_name = $meta_container->get_common_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 $common_name = $meta_container->get_scientific_name() unless $common_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 $self->write($FH, $GENBANK_HEADER, 'SOURCE', $common_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 #organism
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 my @cls = $meta_container->get_classification();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 shift @cls;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 $self->write($FH, $GENBANK_SUBHEADER, 'ORGANISM', $meta_container->get_scientific_name());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 $self->write($FH, $GENBANK_SUBHEADER, '', join('; ', reverse @cls) . ".");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 #refereneces
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 #comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 foreach my $comment (@COMMENTS) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 $self->write($FH, $GENBANK_HEADER, 'COMMENT', $comment);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 ####################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 # DUMP FEATURE TABLE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 ####################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 $self->print( $FH, "FEATURES Location/Qualifiers\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 $self->_dump_feature_table($slice, $FH, $GENBANK_FT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 ####################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 # DUMP SEQUENCE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 ####################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 if(!defined($SEQ)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639 $SEQ = $slice->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 # my $SEQ = $slice->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 my $a_count = $SEQ =~ tr/aA/aA/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 my $c_count = $SEQ =~ tr/cC/cC/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 my $t_count = $SEQ =~ tr/tT/tT/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 my $g_count = $SEQ =~ tr/gG/gG/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 my $bp_length = length($SEQ);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 my $other_count = $bp_length - $a_count - $c_count - $t_count - $g_count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 $tag = 'BASE COUNT';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 $value = "$a_count a $c_count c $g_count g $t_count t";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 $value .= " $other_count n" if($other_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 $self->print($FH, qq{$tag $value\n});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653 $self->print( $FH, "ORIGIN\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 $self->write_genbank_seq($FH, \$SEQ);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 $self->print( $FH, "//\n" );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 # Set formatting back to normal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 $: = " \n-";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 =head2 _dump_feature_table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 Arg [1] : Bio::EnsEMBL::Slice slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 Example : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669 Description: Helper method used to dump feature tables used in EMBL, FASTA,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 GENBANK. Assumes formating of file handle has been setup
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 already to use $FEAT and $VALUE values.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 sub _dump_feature_table {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 my $slice = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681 my $FH = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 my $FORMAT = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 #use only the core database to dump features (except for bloody snps)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 my $lite = $slice->adaptor->db->remove_db_adaptor('lite');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 my $meta = $slice->adaptor->db->get_MetaContainer;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689 #lump file handle and format string together for simpler method calls
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 my @ff = ($FH, $FORMAT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 my $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 #source
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 my $classification = join(', ', $meta->get_classification());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 $self->write(@ff,'source', "1.." . $slice->length());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 $self->write(@ff,'' , '/organism="'.$meta->get_scientific_name(). '"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 $self->write(@ff,'' , '/db_xref="taxon:'.$meta->get_taxonomy_id().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 # Transcripts & Genes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 my @gene_slices;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 if($self->is_enabled('gene')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 push @gene_slices, $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 # Retrieve slices of other database where we need to pull genes from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 my $gene_dbs = {'vegagene' => 'vega',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 'estgene' => 'estgene'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 foreach my $gene_type (keys %$gene_dbs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 if($self->is_enabled($gene_type)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 my $db = $self->get_database($gene_dbs->{$gene_type});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 if($db) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 my $sa = $db->get_SliceAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 push @gene_slices, $sa->fetch_by_name($slice->name());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 warning("A [". $gene_dbs->{$gene_type} ."] database must be " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 "attached to this SeqDumper\n(via a call to " .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 "attach_database) to retrieve genes of type [$gene_type]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 foreach my $gene_slice (@gene_slices) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 my @genes = @{$gene_slice->get_all_Genes(undef,undef, 1)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 while(my $gene = shift @genes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 $value = $self->features2location( [$gene] );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 $self->write( @ff, 'gene', $value );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 $self->write( @ff, "", '/gene='.$gene->stable_id() );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734 if(defined($gene->display_xref)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 $self->write( @ff, "",'/locus_tag="'.$gene->display_xref->display_id.'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 my $desc = $gene->description;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 if(defined($desc) and $desc ne ""){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 $desc =~ s/\"//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 $self->write( @ff, "", '/note="'.$gene->description.'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 foreach my $transcript (@{$gene->get_all_Transcripts}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 my $translation = $transcript->translation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 # normal transcripts get dumped differently than pseudogenes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 if($translation) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 #normal transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 $value = $self->features2location($transcript->get_all_Exons);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 $self->write(@ff, 'mRNA', $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 $self->write(@ff,'' , '/gene="'.$gene->stable_id().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 $self->write(@ff,''
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 ,'/note="transcript_id='.$transcript->stable_id().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 # ...and a CDS section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 $value =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 $self->features2location($transcript->get_all_translateable_Exons);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 $self->write(@ff,'CDS', $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 my $codon_start = $self->transcript_to_codon_start($transcript);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 $self->write(@ff,'' , qq{/codon_start="${codon_start}"}) if $codon_start > 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 $self->write(@ff,'' , '/gene="'.$gene->stable_id().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 $self->write(@ff,'' , '/protein_id="'.$translation->stable_id().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 $self->write(@ff,'' ,'/note="transcript_id='.$transcript->stable_id().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 foreach my $dbl (@{$transcript->get_all_DBLinks}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 $value = '/db_xref="'.$dbl->dbname().':'.$dbl->display_id().'"';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 $self->write(@ff, '', $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 $value = '/translation="'.$transcript->translate()->seq().'"';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 $self->write(@ff, '', $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 #pseudogene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 $value = $self->features2location($transcript->get_all_Exons);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 $self->write(@ff, 'misc_RNA', $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 $self->write(@ff,'' , '/gene="'.$gene->stable_id().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 foreach my $dbl (@{$transcript->get_all_DBLinks}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780 $value = '/db_xref="'.$dbl->dbname().':'.$dbl->primary_id().'"';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 $self->write(@ff, '', $value);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 $self->write(@ff,'' , '/note="'.$transcript->biotype().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784 $self->write(@ff,''
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 ,'/note="transcript_id='.$transcript->stable_id().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790 # exons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 foreach my $gene (@{$gene_slice->get_all_Genes(undef,undef,1)}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 foreach my $exon (@{$gene->get_all_Exons}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793 $self->write(@ff,'exon', $self->features2location([$exon]));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 $self->write(@ff,'' , '/note="exon_id='.$exon->stable_id().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800 # genscans
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 if($self->is_enabled('genscan')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803 my @genscan_exons;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 my @transcripts = @{$slice->get_all_PredictionTranscripts(undef,1)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 while(my $transcript = shift @transcripts) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 my $exons = $transcript->get_all_Exons();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 push @genscan_exons, @$exons;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 $self->write(@ff, 'mRNA', $self->features2location($exons));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 $self->write(@ff, '', '/product="'.$transcript->translate()->seq().'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 $self->write(@ff, '', '/note="identifier='.$transcript->stable_id.'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 $self->write(@ff, '', '/note="Derived by automated computational' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812 ' analysis using gene prediction method:' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 $transcript->analysis->logic_name . '"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 # snps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 if($self->is_enabled('variation') && $slice->can('get_all_VariationFeatures')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 # $slice->adaptor->db->add_db_adaptor('lite', $lite) if $lite;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 my @variations = @{$slice->get_all_VariationFeatures()};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 while(my $snp = shift @variations) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 my $ss = $snp->start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 my $se = $snp->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 #skip snps that hang off edge of slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 next if($ss < 1 || $se > $slice->length);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830 $self->write(@ff, 'variation', "$ss..$se");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 $self->write(@ff, '' , '/replace="'.$snp->allele_string.'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 #$self->write(@ff, '' , '/evidence="'.$snp->status.'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 my $rs_id = $snp->variation_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 my $db = $snp->source();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 # foreach my $link ($snp->each_DBLink) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 # my $id = $link->primary_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 # my $db = $link->database;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 $self->write(@ff, '', "/db_xref=\"$db:$rs_id\"");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 # $slice->adaptor->db->remove_db_adaptor('lite') if $lite;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 # similarity features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 if($self->is_enabled('similarity')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 foreach my $sim (@{$slice->get_all_SimilarityFeatures}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 $self->write(@ff, 'misc_feature', $self->features2location([$sim]));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 $self->write(@ff, '' , '/note="match: '.$sim->hseqname.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 ' : '.$sim->hstart.'..'.$sim->hend.'('.$sim->hstrand.')"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857 # repeats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 if($self->is_enabled('repeat')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 my @rfs = @{$slice->get_all_RepeatFeatures()};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 while(my $repeat = shift @rfs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 $self->write(@ff, 'repeat_region', $self->features2location([$repeat]));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 $self->write(@ff, '' , '/note="' . $repeat->repeat_consensus->name.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 ' repeat: matches ' . $repeat->hstart.'..'.$repeat->hend .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 '('.$repeat->hstrand.') of consensus"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872 # markers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 if($self->is_enabled('marker') && $slice->can('get_all_MarkerFeatures')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 my @markers = @{$slice->get_all_MarkerFeatures()};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 while(my $mf = shift @markers) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 $self->write(@ff, 'STS', $self->features2location([$mf]));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878 if($mf->marker->display_MarkerSynonym) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879 $self->write(@ff, '' , '/standard_name="' .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880 $mf->marker->display_MarkerSynonym->name . '"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884 #grep out synonyms without a source
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 my @synonyms = @{$mf->marker->get_all_MarkerSynonyms};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886 @synonyms = grep {$_->source } @synonyms;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 foreach my $synonym (@synonyms) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 $self->write(@ff, '', '/db_xref="'.$synonym->source.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 ':'.$synonym->name.'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 $self->write(@ff, '', '/note="map_weight='.$mf->map_weight.'"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896 # contigs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 if($self->is_enabled('contig')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899 foreach my $segment (@{$slice->project('seqlevel')}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900 my ($start, $end, $slice) = @$segment;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901 $self->write(@ff, 'misc_feature',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 $start .'..'. $end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903 $self->write(@ff, '', '/note="contig '.$slice->seq_region_name .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 ' ' . $slice->start . '..' . $slice->end .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 '(' . $slice->strand . ')"');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 $slice->adaptor->db->add_db_adaptor('lite', $lite) if $lite;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913 # /codon_start= is the first base to start translating from. This maps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 # Ensembl start exon phase to this concept. Here we present the usage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 # of phase in this concept where each row shows the sequence the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 # spliced_seq() method will return
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 # 123456789
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 # ATTATGACG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 # Phase == 0 ...+++### codon_start=0 // start from 1st A
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 # Phase == 1 ..+++### codon_start=3 // start from 2nd A (base 3 in the given spliced sequence)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922 # Phase == 2 .+++### codon_start=2 // start from 2nd A (base 2 in the spliced sequence)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 # In the case of the final 2 phases we will generate a X codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 sub transcript_to_codon_start {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 my ($self, $transcript) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 my $start_phase = $transcript->start_Exon()->phase();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 return ( $start_phase == 1 ) ? 3 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 ( $start_phase == 2 ) ? 2 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 ( $start_phase == 0 ) ? 1 :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 -1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 =head2 dump_fasta
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 Arg [1] : Bio::EnsEMBL::Slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 Arg [2] : IO::File $FH
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 Example : $seq_dumper->dump_fasta($slice, $FH);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 Description: Dumps an FASTA flat file to an open file handle
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 Caller : dump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949 sub dump_fasta {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951 my $slice = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952 my $FH = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 my $id = $slice->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955 my $seqtype = 'dna';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 my $idtype = $slice->coord_system->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957 my $location = $slice->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958 my $start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 my $end = $slice->length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961 my $header = ">$id $seqtype:$idtype $location\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 $self->print( $FH, $header );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964 #set the formatting to FASTA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 my $FORMAT = '^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966 ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 #chunk the sequence in 60kb chunks to use less memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 my $cur = $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970 while($cur <= $end) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 my $to = $cur + 59_999;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 $to = $end if($to > $end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 my $seq = $slice->subseq($cur, $to);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974 $cur = $to + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 $self->write($FH, $FORMAT, $seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 =head2 features2location
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 Arg [1] : listref of Bio::EnsEMBL::SeqFeatures
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 Example : $location = $self->features2location(\@features);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 Description: Constructs an EMBL location string from a list of features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 Returntype : string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 Caller : internal
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 sub features2location {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 my $features = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 my @join = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 foreach my $f (@$features) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999 my $slice = $f->slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000 my $start = $f->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 my $end = $f->end();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002 my $strand = $f->strand();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 if($start >= 1 && $end <= $slice->length) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005 #this feature in on a slice and doesn't lie outside the boundary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 if($strand == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 push @join, "$start..$end";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 push @join, "complement($start..$end)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 my @fs = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 #this feature is outside the boundary of the dump,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 # yet implemented and 'seqlevel' is guaranteed to be 1step
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 my $projection = $f->project('seqlevel');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 foreach my $segment (@$projection) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018 my $slice = $segment->[2];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019 my $slc_start = $slice->start();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 my $slc_end = $slice->end();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021 my $seq_reg = $slice->seq_region_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022 if($slice->strand == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023 push @join, "$seq_reg:$slc_start..$slc_end";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025 push @join, "complement($seq_reg:$slc_start..$slc_end)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 my $out = join ',', @join;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033 if(scalar @join > 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 $out = "join($out)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 return $out;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041 sub _date_string {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044 my ($sec, $min, $hour, $mday,$mon, $year) = localtime(time());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046 my $month = ('JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 'AUG', 'SEP', 'OCT', 'NOV', 'DEC')[$mon];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048 $year += 1900;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050 return "$mday-$month-$year";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054 sub write {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 my ($self, $FH, $FORMAT, @values) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057 #while the last value still contains something
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058 while(defined($values[-1]) and $values[-1] ne '') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 formline($FORMAT, @values);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060 $self->print( $FH, $^A );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061 $^A = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065 sub write_genbank_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067 my $FH = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 my $seq = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069 my $base_total = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 $base_total ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073 my $GENBANK_SEQ =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074 '@>>>>>>>> ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<<~
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075 ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077 my $total = -59 + $base_total;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078 #keep track of total and print lines of 60 bases with spaces every 10bp
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079 while($$seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 $total += 60;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 formline($GENBANK_SEQ,$total, $$seq, $$seq, $$seq, $$seq, $$seq, $$seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 $self->print( $FH, $^A );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083 $^A = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 sub write_embl_seq {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 my $FH = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 my $seq = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091 my $base_total = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093 $base_total ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 my $EMBL_SEQ =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096 ' ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<< ^<<<<<<<<<@>>>>>>>>>~
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097 ';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098 #keep track of total and print lines of 60 bases with spaces every 10bp
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099 my $length = length($$seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100 my $total = $length - $base_total;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 while($$seq) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102 $total -= 60;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103 $total = 0 if($total < 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 formline($EMBL_SEQ,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 $$seq, $$seq, $$seq, $$seq, $$seq, $$seq,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 $length - $total);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107 $self->print( $FH, $^A );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 $^A = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 sub print {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113 my( $self, $FH, $string ) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 if(!print $FH $string){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115 print STDERR "Problem writing to disk\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116 print STDERR "the string is $string\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117 die "Could not write to file handle";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 1;