annotate variant_effect_predictor/Bio/EnsEMBL/Pipeline/Flatfile/DumpFile.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::Pipeline::Flatfile::DumpFile
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 The main workhorse of the Flatfile dumping pipeline.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 The script is responsible for creating the filenames of these target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 files, taking data from the database and the formatting of the flat files
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 headers. The final files are all Gzipped at normal levels of compression.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 Allowed parameters are:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 =over 8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 =item species - The species to dump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 =item base_path - The base of the dumps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 =item release - The current release we are emitting
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 =item type - The type of data we are emitting. Should be embl or genbank
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 =back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 package Bio::EnsEMBL::Pipeline::Flatfile::DumpFile;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 use base qw(Bio::EnsEMBL::Pipeline::Flatfile::Base);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use Bio::EnsEMBL::Utils::Exception qw/throw/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 use Bio::EnsEMBL::Utils::SeqDumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use Bio::EnsEMBL::Utils::IO qw/gz_work_with_file work_with_file/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 use File::Path qw/rmtree/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 sub param_defaults {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 return {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 supported_types => {embl => 1, genbank => 1},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 sub fetch_input {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 my $type = $self->param('type');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 throw "No type specified" unless $type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 throw "Unsupported type '$type' specified" unless $self->param('supported_types')->{$type};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 throw "Need a species" unless $self->param('species');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 throw "Need a release" unless $self->param('release');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 throw "Need a base_path" unless $self->param('base_path');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 sub run {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 my $root = $self->data_path();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 if(-d $root) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 $self->info('Directory "%s" already exists; removing', $root);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 rmtree($root);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 my $type = $self->param('type');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 my $target = "dump_${type}";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 my $seq_dumper = $self->_seq_dumper();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 my @chromosomes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 my @non_chromosomes;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 foreach my $s (@{$self->get_Slices()}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 my $chr = $s->is_chromosome();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 push(@chromosomes, $s) if $chr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 push(@non_chromosomes, $s) if ! $chr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 if(@non_chromosomes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 my $path = $self->_generate_file_name('nonchromosomal');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 $self->info('Dumping non-chromosomal data to %s', $path);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 gz_work_with_file($path, 'w', sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 my ($fh) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 foreach my $slice (@non_chromosomes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 $self->fine('Dumping non-chromosomal %s', $slice->name());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 $seq_dumper->$target($slice, $fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 $self->info('Did not find any non-chromosomal data');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 foreach my $slice (@chromosomes) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 $self->fine('Dumping chromosome %s', $slice->name());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 my $path = $self->_generate_file_name($slice->coord_system_name(), $slice->seq_region_name());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 my $args = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 if(-f $path) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $self->fine('Path "%s" already exists; appending', $path);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 $args->{Append} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 gz_work_with_file($path, 'w', sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 my ($fh) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 $seq_dumper->$target($slice, $fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 }, $args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 $self->_create_README();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 sub _seq_dumper {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 my $seq_dumper = Bio::EnsEMBL::Utils::SeqDumper->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 $seq_dumper->disable_feature_type('similarity');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 $seq_dumper->disable_feature_type('genscan');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 $seq_dumper->disable_feature_type('variation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 $seq_dumper->disable_feature_type('repeat');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 return $seq_dumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 sub _generate_file_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 my ($self, $section, $name) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 # File name format looks like:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 # <species>.<assembly>.<release>.<section.name|section>.dat.gz
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 # e.g. Homo_sapiens.GRCh37.64.chromosome.20.dat.gz
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 # Homo_sapiens.GRCh37.64.nonchromosomal.dat.gz
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 my @name_bits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 push @name_bits, $self->web_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 push @name_bits, $self->assembly();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 push @name_bits, $self->param('release');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 push @name_bits, $section if $section;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 push @name_bits, $name if $name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 push @name_bits, 'dat', 'gz';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 my $file_name = join( '.', @name_bits );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my $path = $self->data_path();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 return File::Spec->catfile($path, $file_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 sub _create_README {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 my $species = $self->scientific_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 my $format = uc($self->param('type'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 my $readme = <<README;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 #### README ####
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 IMPORTANT: Please note you can download correlation data tables,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 supported by Ensembl, via the highly customisable BioMart and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 EnsMart data mining tools. See http://www.ensembl.org/biomart/martview or
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 http://www.ebi.ac.uk/biomart/ for more information.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 -----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 $format FLATFILE DUMPS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 -----------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 This directory contains $species $format flatfile dumps. To ease
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 downloading of the files, the $format format entries are bundled
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 into groups of chromosomes and non-chromosomal regions.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 All files are then compacted with gzip.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 Ensembl provides an automatic reannotation of $species genomic data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 These data will be dumped in a number of forms - one of them being
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 $format flat files. As the annotation of this form comes from Ensembl,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 and not the original sequence entry, the two annotations are
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 likely to be different.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 $format flat file format dumping provides all the confirmed protein coding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 genes known by Ensembl. Considerably more information is stored in Ensembl:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 the flat file just gives a representation which is compatible with
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 existing tools.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 The main body of the entry gives the same information as is in the main
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 $format flat file entry.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 * ID - the $format id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 * AC - the EMBL/GenBank/DDBJ accession number (only the primary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 accession number used)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 * SV - The accession.version pair which gives the exact reference to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 a particular sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 * CC - comment lines to help you interpret the entry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 Currently the following features are dumped into the feature table of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 the Ensembl entry:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 * Transcripts as CDS entries. Each transcript has the following
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 attributes attached
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 o Transcript id - a stable id, which Ensembl will attempt to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 preserve as sensibly as possible during updates of the data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 o Gene id - indication of the gene that this transcript belongs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 to. gene ids are stable and preserved as sensibly as possible
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 during updates of the data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 o Translation - the peptide translation of the transcript.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 * Exons as exon entries. Each exon has the following information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 o Exon id. The exon id is stable and preserved as sensibly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 as possible during sequence updates
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 o start_phase. The phase of the splice site at the 5' end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 of the exon. Phase 0 means between two codons, phase 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 means between the first and the second base of the codon
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 (meaning that there are 2 bases until the reading frame of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 the exon) and phase 2 means between the second and the third
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 base of the codon (one base until the reading frame starts).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 o end_phase. The phase of the splice site at the 3' end of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 exon: same definition as above (though of course, being end_phase,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 the position relative to the exon's reading frame is different
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 for phase 1 and 2).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 We are considering other information that should be made dumpable. In
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 general we would prefer people to use database access over flat file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 access if you want to do something serious with the data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 README
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 my $path = File::Spec->catfile($self->data_path(), 'README');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 work_with_file($path, 'w', sub {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 my ($fh) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 print $fh $readme;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253