annotate variant_effect_predictor/Bio/EnsEMBL/IdMapping/Archiver.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Bio::EnsEMBL::IdMapping::Archiver - create gene_archive and peptide_archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 my $archiver = Bio::EnsEMBL::IdMapping::Archiver->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 -LOGGER => $logger,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 -CONF => $conf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 -CACHE => $cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 # create gene and peptide archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 $archiver->create_archive($mapping_session_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 # dump existing archive tables to file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 my $num_entries =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 $archiver->dump_table_to_file( 'source', 'gene_archive',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 'gene_archive_existing.txt', 1 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 This module creates the gene_archive and peptide_archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 tables. Data is written to a file as tab-delimited text for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 loading into a MySQL database (this can be done manually, or using
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 StableIdmapper->upload_file_into_table()).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 An archive entry for a given source gene is created if no target
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 gene exists, or if any of its transcripts or their translations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 changed. Non-coding transcripts only have an entry in gene_archive (i.e.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 without a corresponding peptide_archive entry).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 create_archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 dump_gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 dump_tuple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 dump_nc_row
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 mapping_session_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 package Bio::EnsEMBL::IdMapping::Archiver;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 no warnings 'uninitialized';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 use Bio::EnsEMBL::IdMapping::BaseObject;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 our @ISA = qw(Bio::EnsEMBL::IdMapping::BaseObject);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 use Bio::EnsEMBL::Utils::Exception qw(throw warning);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 use Digest::MD5 qw(md5_hex);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 # instance variables
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 my $pa_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 =head2 create_archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 Arg[1] : Int $mapping_session_id - the mapping_session_id for this run
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 Example : $archiver->create_archive($stable_id_mapper->mapping_session_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 Description : Creates the gene_archive and peptide_archive tables and writes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 the data to a tab-delimited file. The decision as to what to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 archive is deferred to dump_gene(), see documentation there for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 details.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 Exceptions : Thrown on missing argument.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 Caller : id_mapping.pl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 sub create_archive {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 my $mapping_session_id = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 # argument check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 unless ($mapping_session_id) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 $self->logger->warning("No mapping_session_id set.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 $self->mapping_session_id($mapping_session_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 # get filehandles to write gene and peptide archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 my $ga_fh = $self->get_filehandle('gene_archive_new.txt', 'tables');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 my $pa_fh = $self->get_filehandle('peptide_archive_new.txt', 'tables');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 # get the currently highest peptide_archive_id from the source db
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 my $s_dba = $self->cache->get_DBAdaptor('source');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 my $s_dbh = $s_dba->dbc->db_handle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 my $sql = qq(SELECT MAX(peptide_archive_id) FROM peptide_archive);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 $pa_id = $self->fetch_value_from_db($s_dbh, $sql);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 unless ($pa_id) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 $self->logger->warning("No max(peptide_archive_id) found in db.\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 $self->logger->info("That's ok if this is the first stable ID mapping for this species.\n", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $pa_id++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 $self->logger->debug("Starting with peptide_archive_id $pa_id.\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 # lookup hash of target gene stable IDs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 my %target_genes = map { $_->stable_id => $_ }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 values %{ $self->cache->get_by_name("genes_by_id", 'target') };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 # loop over source genes and dump to archive (dump_gene() will decide whether
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 # to do full or partial dump)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 foreach my $source_gene (values %{ $self->cache->get_by_name("genes_by_id",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 'source') }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 $self->dump_gene($source_gene, $target_genes{$source_gene->stable_id},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 $ga_fh, $pa_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 close($ga_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 close($pa_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 =head2 dump_gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 Arg[1] : Bio::EnsEMBL::IdMapping::TinyGene $s_gene - source gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 Arg[2] : Bio::EnsEMBL::IdMapping::TinyGene $t_gene - target gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 Arg[3] : Filehandle $ga_fh - filehandle for writing gene_archive data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 Arg[4] : Filehandle $pa_fh - filehandle for writing peptide_archive data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 Example : my $target_gene = $gene_mappings{$source_gene->stable_id};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 $archiver->dump_gene($source_gene, $target_gene, $ga_fh, $pa_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 Description : Given a source gene, it will write a gene_achive and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 peptide_achive entry for it if no target gene exists, or if any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 of its transcripts or their translation changed.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 Caller : create_archive()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 sub dump_gene {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my ($self, $s_gene, $t_gene, $ga_fh, $pa_fh) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 # private method, so no argument check done for performance reasons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 # deal with ncRNA differently
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 # hope this simple biotype regex is accurate enough...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 my $is_ncRNA = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 $is_ncRNA = 1 if ($s_gene->biotype =~ /RNA/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 # loop over source transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 foreach my $s_tr (@{ $s_gene->get_all_Transcripts }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 my $s_tl = $s_tr->translation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 # we got a coding transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 if ($s_tl) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 # do a full dump of this gene if no target gene exists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 if (! $t_gene) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 $self->dump_tuple($s_gene, $s_tr, $s_tl, $ga_fh, $pa_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 # otherwise, only dump if any transcript or its translation changed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 my $changed_flag = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 foreach my $t_tr (@{ $t_gene->get_all_Transcripts }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 my $t_tl = $t_tr->translation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 next unless ($t_tl);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 if (($s_tr->stable_id eq $t_tr->stable_id) and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 ($s_tl->stable_id eq $t_tl->stable_id) and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 ($s_tl->seq eq $t_tl->seq)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 $changed_flag = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 if ($changed_flag) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 $self->dump_tuple($s_gene, $s_tr, $s_tl, $ga_fh, $pa_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 # now deal with ncRNAs (they don't translate but we still want to archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 # them)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 } elsif ($is_ncRNA) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 if (! $t_gene) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 $self->dump_nc_row($s_gene, $s_tr, $ga_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 my $changed_flag = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 foreach my $t_tr (@{ $t_gene->get_all_Transcripts }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 $changed_flag = 0 if ($s_tr->stable_id eq $t_tr->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 if ($changed_flag) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 $self->dump_nc_row($s_gene, $s_tr, $ga_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 =head2 dump_tuple
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 Arg[1] : Bio::EnsEMBL::IdMapping::TinyGene $gene - gene to archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 Arg[2] : Bio::EnsEMBL::IdMapping::TinyTrancript $tr - its transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 Arg[3] : Bio::EnsEMBL::IdMapping::TinyTranslation $tl - its translation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 Arg[4] : Filehandle $ga_fh - filehandle for writing gene_archive data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 Arg[5] : Filehandle $pa_fh - filehandle for writing peptide_archive data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 Example : $archive->dump_tuple($s_gene, $s_tr, $s_tl, $ga_fh, $pa_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 Description : Writes entry lines for gene_archive and peptide_archive.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 Caller : dump_gene()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 sub dump_tuple {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 my ($self, $gene, $tr, $tl, $ga_fh, $pa_fh) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 # private method, so no argument check done for performance reasons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 # gene archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 print $ga_fh join("\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 $gene->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 $gene->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 $tr->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 $tr->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $tl->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 $tl->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $pa_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 $self->mapping_session_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 print $ga_fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 # peptide archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 my $pep_seq = $tl->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 print $pa_fh join("\t", $pa_id, md5_hex($pep_seq), $pep_seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 print $pa_fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 # increment peptide_archive_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 $pa_id++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 =head2 dump_nc_row
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 Arg[1] : Bio::EnsEMBL::IdMapping::TinyGene $gene - gene to archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 Arg[2] : Bio::EnsEMBL::IdMapping::TinyTrancript $tr - its transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 Arg[3] : Filehandle $ga_fh - filehandle for writing gene_archive data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 Example : $archive->dump_nc_row($s_gene, $s_tr, $ga_fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 Description : Writes an entry line for gene_archive for non-coding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 transcripts.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 Return type : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 Caller : dump_gene()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 sub dump_nc_row {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 my ($self, $gene, $tr, $ga_fh) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 # private method, so no argument check done for performance reasons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 # gene archive
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 print $ga_fh join("\t",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 $gene->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 $gene->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 $tr->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 $tr->version,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 '\N',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 '\N',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 '\N',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 $self->mapping_session_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 print $ga_fh "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 =head2 mapping_session_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 Arg[1] : (optional) Int - mapping_session_id to set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 Example : my $msi = $archiver->mapping_session_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 Description : Getter/setter for mapping_session_id.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 Return type : Int
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 Caller : create_archive()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 Status : At Risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 : under development
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 sub mapping_session_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 $self->{'_mapping_session_id'} = shift if (@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 return $self->{'_mapping_session_id'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334