annotate dir_plugins/MaxEntScan.pm @ 10:f594c6bed58f draft default tip

Uploaded
author dvanzessen
date Tue, 21 Apr 2020 11:40:19 +0000
parents e545d0a25ffe
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
1 =head1 LICENSE
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
2
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
4 Copyright [2016-2018] EMBL-European Bioinformatics Institute
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
5
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
6 Licensed under the Apache License, Version 2.0 (the "License");
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
7 you may not use this file except in compliance with the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
8 You may obtain a copy of the License at
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
9
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
10 http://www.apache.org/licenses/LICENSE-2.0
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
11
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
12 Unless required by applicable law or agreed to in writing, software
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
13 distributed under the License is distributed on an "AS IS" BASIS,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
15 See the License for the specific language governing permissions and
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
16 limitations under the License.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
17
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
18 =head1 CONTACT
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
19
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
20 Ensembl <http://www.ensembl.org/info/about/contact/index.html>
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
21
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
22 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
23
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
24 =head1 NAME
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
25
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
26 MaxEntScan
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
27
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
28 =head1 SYNOPSIS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
29
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
30 mv MaxEntScan.pm ~/.vep/Plugins
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
31 ./vep -i variants.vcf --plugin MaxEntScan,/path/to/maxentscan/fordownload
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
32 ./vep -i variants.vcf --plugin MaxEntScan,/path/to/maxentscan/fordownload,SWA,NCSS
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
33
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
34 =head1 DESCRIPTION
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
35
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
36 This is a plugin for the Ensembl Variant Effect Predictor (VEP) that
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
37 runs MaxEntScan (http://genes.mit.edu/burgelab/maxent/Xmaxentscan_scoreseq.html)
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
38 to get splice site predictions.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
39
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
40 The plugin copies most of the code verbatim from the score5.pl and score3.pl
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
41 scripts provided in the MaxEntScan download. To run the plugin you must get and
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
42 unpack the archive from http://genes.mit.edu/burgelab/maxent/download/; the path
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
43 to this unpacked directory is then the param you pass to the --plugin flag.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
44
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
45 The plugin executes the logic from one of the scripts depending on which
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
46 splice region the variant overlaps:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
47
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
48 score5.pl : last 3 bases of exon --> first 6 bases of intron
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
49 score3.pl : last 20 bases of intron --> first 3 bases of exon
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
50
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
51 The plugin reports the reference, alternate and difference (REF - ALT) maximum
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
52 entropy scores.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
53
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
54 If 'SWA' is specified as a command-line argument, a sliding window algorithm
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
55 is applied to subsequences containing the reference and alternate alleles to
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
56 identify k-mers with the highest donor and acceptor splice site scores. To assess
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
57 the impact of variants, reference comparison scores are also provided. For SNVs,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
58 the comparison scores are derived from sequence in the same frame as the highest
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
59 scoring k-mers containing the alternate allele. For all other variants, the
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
60 comparison scores are derived from the highest scoring k-mers containing the
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
61 reference allele. The difference between the reference comparison and alternate
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
62 scores (SWA_REF_COMP - SWA_ALT) are also provided.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
63
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
64 If 'NCSS' is specified as a command-line argument, scores for the nearest
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
65 upstream and downstream canonical splice sites are also included.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
66
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
67 By default, only scores are reported. Add 'verbose' to the list of command-
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
68 line arguments to include the sequence output associated with those scores.
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
69
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
70 =cut
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
71
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
72 package MaxEntScan;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
73
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
74 use strict;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
75 use warnings;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
76
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
77 use Digest::MD5 qw(md5_hex);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
78
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
79 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
80 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(overlap);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
81
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
82 use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
83 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
84
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
85 # how many seq/score pairs to cache in memory
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
86 our $CACHE_SIZE = 50;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
87
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
88 sub new {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
89 my $class = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
90
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
91 my $self = $class->SUPER::new(@_);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
92
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
93 # we need sequence, so no offline mode unless we have FASTA
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
94 die("ERROR: cannot function in offline mode without a FASTA file\n") if $self->{config}->{offline} && !$self->{config}->{fasta};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
95
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
96 my $params = $self->params;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
97
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
98 my $dir = shift @$params;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
99 die("ERROR: MaxEntScan directory not specified\n") unless $dir;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
100 die("ERROR: MaxEntScan directory not found\n") unless -d $dir;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
101 $self->{_dir} = $dir;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
102
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
103 ## setup from score5.pl
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
104 $self->{'score5_me2x5'} = $self->score5_makescorematrix($dir.'/me2x5');
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
105 $self->{'score5_seq'} = $self->score5_makesequencematrix($dir.'/splicemodels/splice5sequences');
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
106
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
107 ## setup from score3.pl
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
108 $self->{'score3_metables'} = $self->score3_makemaxentscores;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
109
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
110 my %opts = map { $_ => undef } @$params;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
111
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
112 $self->{'run_SWA'} = 1 if exists $opts{'SWA'};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
113 $self->{'run_NCSS'} = 1 if exists $opts{'NCSS'};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
114
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
115 $self->{'verbose'} = 1 if exists $opts{'verbose'};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
116
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
117 return $self;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
118 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
119
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
120 sub feature_types {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
121 return ['Transcript'];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
122 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
123
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
124 sub get_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
125 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
126
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
127 my $v = $self->{'verbose'};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
128 my $headers = $self->get_MES_header_info($v);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
129
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
130 if ($self->{'run_SWA'}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
131 my $swa_headers = $self->get_SWA_header_info($v);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
132 $headers = {%$headers, %$swa_headers};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
133 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
134
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
135 if ($self->{'run_NCSS'}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
136 my $ncss_headers = $self->get_NCSS_header_info($v);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
137 $headers = {%$headers, %$ncss_headers};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
138 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
139
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
140 return $headers;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
141 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
142
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
143 sub get_MES_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
144 my ($self, $verbose) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
145
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
146 my $headers = {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
147 MaxEntScan_ref => "MaxEntScan reference sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
148 MaxEntScan_alt => "MaxEntScan alternate sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
149 MaxEntScan_diff => "MaxEntScan score difference",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
150 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
151
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
152 if ($verbose) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
153
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
154 $headers->{'MaxEntScan_ref_seq'} = "MaxEntScan reference sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
155 $headers->{'MaxEntScan_alt_seq'} = "MaxEntScan alternate sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
156 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
157
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
158 return $headers;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
159 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
160
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
161 sub get_SWA_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
162 my ($self, $verbose) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
163
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
164 my $headers = {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
165 "MES-SWA_donor_ref" => "Highest splice donor reference sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
166 "MES-SWA_donor_alt" => "Highest splice donor alternate sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
167 "MES-SWA_donor_ref_comp" => "Donor reference comparison sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
168 "MES-SWA_donor_diff" => "Difference between the donor reference comparison and alternate sequence scores",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
169
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
170 "MES-SWA_acceptor_ref" => "Highest splice acceptor reference sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
171 "MES-SWA_acceptor_alt" => "Highest splice acceptor alternate sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
172 "MES-SWA_acceptor_ref_comp" => "Acceptor reference comparison sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
173 "MES-SWA_acceptor_diff" => "Difference between the acceptor reference comparison and alternate sequence scores",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
174 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
175
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
176 if ($verbose) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
177
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
178 $headers->{'MES-SWA_donor_ref_seq'} = "Highest splice donor reference sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
179 $headers->{'MES-SWA_donor_ref_frame'} = "Position of the highest splice donor reference sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
180 $headers->{'MES-SWA_donor_ref_context'} = "Selected donor sequence context containing the reference allele";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
181 $headers->{'MES-SWA_donor_alt_seq'} = "Highest splice donor alternate sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
182 $headers->{'MES-SWA_donor_alt_frame'} = "Position of the highest splice donor alternate sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
183 $headers->{'MES-SWA_donor_alt_context'} = "Selected donor sequence context containing the alternate allele";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
184 $headers->{'MES-SWA_donor_ref_comp_seq'} = "Donor reference comparison sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
185
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
186 $headers->{'MES-SWA_acceptor_ref_seq'} = "Highest splice acceptor reference sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
187 $headers->{'MES-SWA_acceptor_ref_frame'} = "Position of the highest splice acceptor reference sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
188 $headers->{'MES-SWA_acceptor_ref_context'} = "Selected acceptor sequence context containing the reference allele";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
189 $headers->{'MES-SWA_acceptor_alt_seq'} = "Highest splice acceptor alternate sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
190 $headers->{'MES-SWA_acceptor_alt_frame'} = "Position of the highest splice acceptor alternate sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
191 $headers->{'MES-SWA_acceptor_alt_context'} = "Selected acceptor sequence context containing the alternate allele";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
192 $headers->{'MES-SWA_acceptor_ref_comp_seq'} = "Acceptor reference comparison sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
193 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
194
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
195 return $headers;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
196 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
197
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
198 sub get_NCSS_header_info {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
199 my ($self, $verbose) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
200
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
201 my $headers = {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
202 "MES-NCSS_upstream_acceptor" => "Nearest upstream canonical splice acceptor sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
203 "MES-NCSS_upstream_donor" => "Nearest upstream canonical splice donor sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
204
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
205 "MES-NCSS_downstream_acceptor" => "Nearest downstream canonical splice acceptor sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
206 "MES-NCSS_downstream_donor" => "Nearest downstream canonical splice donor sequence score",
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
207 };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
208
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
209 if ($verbose) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
210
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
211 $headers->{'MES-NCSS_upstream_acceptor_seq'} = "Nearest upstream canonical splice acceptor sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
212 $headers->{'MES-NCSS_upstream_donor_seq'} = "Nearest upstream canonical splice donor sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
213
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
214 $headers->{'MES-NCSS_downstream_acceptor_seq'} = "Nearest downstream canonical splice acceptor sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
215 $headers->{'MES-NCSS_downstream_donor_seq'} = "Nearest downstream canonical splice donor sequence";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
216 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
217
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
218 return $headers;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
219 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
220
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
221 sub run {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
222 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
223
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
224 my $seq_headers = $self->get_MES_header_info();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
225 my $results = $self->run_MES($tva);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
226
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
227 if ($self->{'run_SWA'}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
228 my $swa_seq_headers = $self->get_SWA_header_info();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
229 $seq_headers = {%$seq_headers, %$swa_seq_headers};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
230 my $swa_results = $self->run_SWA($tva);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
231 $results = {%$results, %$swa_results};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
232 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
233
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
234 if ($self->{'run_NCSS'}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
235 my $ncss_seq_headers = $self->get_NCSS_header_info();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
236 $seq_headers = {%$seq_headers, %$ncss_seq_headers};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
237 my $ncss_results = $self->run_NCSS($tva);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
238 $results = {%$results, %$ncss_results};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
239 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
240
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
241 my %data;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
242
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
243 # add the scores
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
244 my @scores = grep { exists $results->{$_} } keys %$seq_headers;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
245 @data{@scores} = map { sprintf('%.3f', $_) } @{$results}{@scores};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
246
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
247 if ($self->{'verbose'}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
248 # add any remaining results
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
249 my @non_scores = grep { ! exists $data{$_} } keys %$results;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
250 @data{@non_scores} = @{$results}{@non_scores};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
251 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
252
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
253 return \%data;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
254 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
255
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
256 sub run_MES {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
257 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
258
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
259 my $vf = $tva->variation_feature;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
260 return {} unless $vf->{start} == $vf->{end} && $tva->feature_seq =~ /^[ACGT]$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
261
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
262 my $tv = $tva->transcript_variation;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
263 my $tr = $tva->transcript;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
264 my $tr_strand = $tr->strand;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
265 my ($vf_start, $vf_end) = ($vf->start, $vf->end);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
266
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
267 # use _overlapped_introns() method from BaseTranscriptVariation
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
268 # this will use an interval tree if available for superfast lookup of overlapping introns
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
269 # we have to expand the search space around $vf because we're looking for the splice region not the intron per se
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
270 foreach my $intron(@{$tv->_overlapped_introns($vf_start - 21, $vf_end + 21)}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
271
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
272 # get coords depending on strand
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
273 # MaxEntScan does different predictions for 5 and 3 prime
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
274 # and we need to feed it different bits of sequence for each
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
275 #
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
276 # 5prime, 3 bases of exon, 6 bases of intron:
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
277 # ===------
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
278 #
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
279 # 3prime, 20 bases of intron, 3 bases of exon
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
280 # --------------------===
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
281
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
282 my ($five_start, $five_end, $three_start, $three_end);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
283
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
284 if($tr_strand > 0) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
285 ($five_start, $five_end) = ($intron->start - 3, $intron->start + 5);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
286 ($three_start, $three_end) = ($intron->end - 19, $intron->end + 3);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
287 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
288
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
289 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
290 ($five_start, $five_end) = ($intron->end - 5, $intron->end + 3);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
291 ($three_start, $three_end) = ($intron->start - 3, $intron->start + 19);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
292 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
293
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
294 if(overlap($vf->start, $vf->end, $five_start, $five_end)) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
295 my ($ref_seq, $alt_seq) = @{$self->get_seqs($tva, $five_start, $five_end)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
296
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
297 return {} unless defined($ref_seq) && $ref_seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
298 return {} unless defined($alt_seq) && $alt_seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
299
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
300 my $ref_score = $self->score5($ref_seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
301 my $alt_score = $self->score5($alt_seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
302
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
303 return {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
304 MaxEntScan_ref => $ref_score,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
305 MaxEntScan_ref_seq => $ref_seq,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
306 MaxEntScan_alt => $alt_score,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
307 MaxEntScan_alt_seq => $alt_seq,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
308 MaxEntScan_diff => $ref_score - $alt_score,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
309 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
310 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
311
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
312 if(overlap($vf->start, $vf->end, $three_start, $three_end)) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
313 my ($ref_seq, $alt_seq) = @{$self->get_seqs($tva, $three_start, $three_end)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
314
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
315 return {} unless defined($ref_seq) && $ref_seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
316 return {} unless defined($alt_seq) && $alt_seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
317
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
318 my $ref_score = $self->score3($ref_seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
319 my $alt_score = $self->score3($alt_seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
320
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
321 return {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
322 MaxEntScan_ref => $ref_score,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
323 MaxEntScan_ref_seq => $ref_seq,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
324 MaxEntScan_alt => $alt_score,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
325 MaxEntScan_alt_seq => $alt_seq,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
326 MaxEntScan_diff => $ref_score - $alt_score,
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
327 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
328 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
329 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
330
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
331 return {};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
332 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
333
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
334 sub run_SWA {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
335 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
336
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
337 my $vf = $tva->variation_feature;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
338
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
339 my %results;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
340
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
341 # get the donor reference and alternate sequence contexts
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
342 my ($donor_ref_context, $donor_alt_context) = @{$self->get_seqs($tva, $vf->start - 8, $vf->end + 8)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
343
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
344 if (defined($donor_ref_context)) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
345 $results{'MES-SWA_donor_ref_context'} = $donor_ref_context;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
346
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
347 if ($donor_ref_context =~ /^[ACGT]+$/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
348 my ($seq, $frame, $score) = @{$self->get_max_donor($donor_ref_context)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
349 $results{'MES-SWA_donor_ref_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
350 $results{'MES-SWA_donor_ref_frame'} = $frame;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
351 $results{'MES-SWA_donor_ref'} = $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
352 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
353 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
354
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
355 if (defined($donor_alt_context)) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
356 $results{'MES-SWA_donor_alt_context'} = $donor_alt_context;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
357
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
358 if ($donor_alt_context =~ /^[ACGT]+$/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
359 my ($seq, $frame, $score) = @{$self->get_max_donor($donor_alt_context)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
360 $results{'MES-SWA_donor_alt_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
361 $results{'MES-SWA_donor_alt_frame'} = $frame;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
362 $results{'MES-SWA_donor_alt'} = $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
363
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
364 if (defined(my $ref_comp_seq = $results{'MES-SWA_donor_ref_seq'})) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
365
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
366 if ($vf->{start} == $vf->{end} && $tva->feature_seq =~ /^[ACGT]$/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
367 # for SNVs, compare to the same frame as the highest scoring ALT k-mer
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
368 $ref_comp_seq = substr($donor_ref_context, $frame - 1, 9);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
369 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
370
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
371 $results{'MES-SWA_donor_ref_comp_seq'} = $ref_comp_seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
372 $results{'MES-SWA_donor_ref_comp'} = $self->score5($ref_comp_seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
373
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
374 $results{'MES-SWA_donor_diff'} = $results{'MES-SWA_donor_ref_comp'} - $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
375 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
376 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
377 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
378
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
379 # get the acceptor reference and alternate sequence contexts
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
380 my ($acceptor_ref_context, $acceptor_alt_context) = @{$self->get_seqs($tva, $vf->start - 22, $vf->end + 22)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
381
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
382 if (defined($acceptor_ref_context)) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
383 $results{'MES-SWA_acceptor_ref_context'} = $acceptor_ref_context;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
384
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
385 if ($acceptor_ref_context =~ /^[ACGT]+$/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
386 my ($seq, $frame, $score) = @{$self->get_max_acceptor($acceptor_ref_context)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
387 $results{'MES-SWA_acceptor_ref_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
388 $results{'MES-SWA_acceptor_ref_frame'} = $frame;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
389 $results{'MES-SWA_acceptor_ref'} = $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
390 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
391 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
392
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
393 if (defined($acceptor_alt_context)) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
394 $results{'MES-SWA_acceptor_alt_context'} = $acceptor_alt_context;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
395
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
396 if ($acceptor_alt_context =~ /^[ACGT]+$/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
397 my ($seq, $frame, $score) = @{$self->get_max_acceptor($acceptor_alt_context)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
398 $results{'MES-SWA_acceptor_alt_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
399 $results{'MES-SWA_acceptor_alt_frame'} = $frame;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
400 $results{'MES-SWA_acceptor_alt'} = $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
401
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
402 if (defined(my $ref_comp_seq = $results{'MES-SWA_acceptor_ref_seq'})) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
403
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
404 if ($vf->{start} == $vf->{end} && $tva->feature_seq =~ /^[ACGT]$/) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
405 # for SNVs, compare to the same frame as the highest scoring ALT k-mer
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
406 $ref_comp_seq = substr($acceptor_ref_context, $frame - 1, 23);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
407 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
408
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
409 $results{'MES-SWA_acceptor_ref_comp_seq'} = $ref_comp_seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
410 $results{'MES-SWA_acceptor_ref_comp'} = $self->score3($ref_comp_seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
411
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
412 $results{'MES-SWA_acceptor_diff'} = $results{'MES-SWA_acceptor_ref_comp'} - $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
413 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
414 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
415 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
416
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
417 return \%results;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
418 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
419
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
420 sub run_NCSS {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
421 my ($self, $tva) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
422
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
423 my $tv = $tva->transcript_variation;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
424 my $tr = $tva->transcript;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
425
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
426 my %results;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
427
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
428 if ($tv->intron_number) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
429
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
430 my ($intron_numbers, $total_introns) = split(/\//, $tv->intron_number);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
431 my $intron_number = (split(/-/, $intron_numbers))[0];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
432
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
433 my $introns = $tr->get_all_Introns;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
434
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
435 my $intron_idx = $intron_number - 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
436 my $intron = $introns->[$intron_idx];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
437
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
438 if (defined(my $seq = $self->get_donor_seq_from_intron($intron))) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
439 $results{'MES-NCSS_upstream_donor_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
440 $results{'MES-NCSS_upstream_donor'} = $self->score5($seq) if $seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
441 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
442
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
443 if (defined(my $seq = $self->get_acceptor_seq_from_intron($intron))) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
444 $results{'MES-NCSS_downstream_acceptor_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
445 $results{'MES-NCSS_downstream_acceptor'} = $self->score3($seq) if $seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
446 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
447
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
448 # don't calculate an upstream acceptor score if the intron is the first in the transcript
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
449 unless ($intron_number == 1) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
450 my $upstream_intron = $introns->[$intron_idx - 1];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
451
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
452 if (defined(my $seq = $self->get_acceptor_seq_from_intron($upstream_intron))) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
453 $results{'MES-NCSS_upstream_acceptor_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
454 $results{'MES-NCSS_upstream_acceptor'} = $self->score3($seq) if $seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
455 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
456 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
457
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
458 # don't calculate a downstream donor score if the intron is the last in the transcript
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
459 unless ($intron_number == $total_introns) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
460 my $downstream_intron = $introns->[$intron_idx + 1];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
461
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
462 if (defined(my $seq = $self->get_donor_seq_from_intron($downstream_intron))) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
463 $results{'MES-NCSS_downstream_donor_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
464 $results{'MES-NCSS_downstream_donor'} = $self->score5($seq) if $seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
465 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
466 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
467 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
468
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
469 elsif ($tv->exon_number) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
470
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
471 my ($exon_numbers, $total_exons) = split(/\//, $tv->exon_number);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
472 my $exon_number = (split(/-/, $exon_numbers))[0];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
473
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
474 my $exons = $tr->get_all_Exons;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
475
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
476 my $exon_idx = $exon_number - 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
477 my $exon = $exons->[$exon_idx];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
478
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
479 # don't calculate upstream scores if the exon is the first in the transcript
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
480 unless ($exon_number == 1) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
481 my $upstream_exon = $exons->[$exon_idx - 1];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
482
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
483 if (defined(my $seq = $self->get_donor_seq_from_exon($upstream_exon))) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
484 $results{'MES-NCSS_upstream_donor_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
485 $results{'MES-NCSS_upstream_donor'} = $self->score5($seq) if $seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
486 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
487
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
488 if (defined(my $seq = $self->get_acceptor_seq_from_exon($exon))) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
489 $results{'MES-NCSS_upstream_acceptor_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
490 $results{'MES-NCSS_upstream_acceptor'} = $self->score3($seq) if $seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
491 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
492 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
493
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
494 # don't calculate downstream scores if the exon is the last exon in the transcript
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
495 unless ($exon_number == $total_exons) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
496 my $downstream_exon = $exons->[$exon_idx + 1];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
497
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
498 if (defined(my $seq = $self->get_donor_seq_from_exon($exon))) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
499 $results{'MES-NCSS_downstream_donor_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
500 $results{'MES-NCSS_downstream_donor'} = $self->score5($seq) if $seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
501 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
502
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
503 if (defined(my $seq = $self->get_acceptor_seq_from_exon($downstream_exon))) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
504 $results{'MES-NCSS_downstream_acceptor_seq'} = $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
505 $results{'MES-NCSS_downstream_acceptor'} = $self->score3($seq) if $seq =~ /^[ACGT]+$/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
506 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
507 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
508 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
509
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
510 return \%results;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
511 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
512
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
513
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
514 ## Sliding window approach methods
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
515 ##################################
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
516
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
517 sub get_max_donor {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
518 my ($self, $sequence) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
519
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
520 my ($seq, $frame, $max);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
521 my @kmers = @{$self->sliding_window($sequence, 9)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
522
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
523 for my $i (0 .. $#kmers) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
524 my $kmer = $kmers[$i];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
525 my $score = $self->score5($kmer);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
526 if(!$max || $score > $max) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
527 $seq = $kmer;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
528 $frame = $i + 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
529 $max = $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
530 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
531 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
532 return [$seq, $frame, $max];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
533 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
534
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
535 sub get_max_acceptor {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
536 my ($self, $sequence) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
537
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
538 my ($seq, $frame, $max);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
539 my @kmers = @{$self->sliding_window($sequence, 23)};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
540
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
541 for my $i (0 .. $#kmers) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
542 my $kmer = $kmers[$i];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
543 my $score = $self->score3($kmer);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
544 if(!$max || $score > $max) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
545 $seq = $kmer;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
546 $frame = $i + 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
547 $max = $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
548 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
549 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
550 return [$seq, $frame, $max];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
551 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
552
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
553 sub sliding_window {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
554 my ($self, $sequence, $winsize) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
555 my @seqs;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
556 for (my $i = 1; $i <= length($sequence) - $winsize + 1; $i++) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
557 push @seqs, substr($sequence, $i - 1, $winsize);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
558 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
559 return \@seqs;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
560 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
561
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
562
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
563 ## Nearest canonical splice site methods
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
564 ########################################
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
565
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
566 sub get_donor_seq_from_exon {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
567 my ($self, $exon) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
568
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
569 my ($start, $end);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
570
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
571 if ($exon->strand > 0) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
572 ($start, $end) = ($exon->end - 2, $exon->end + 6);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
573 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
574 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
575 ($start, $end) = ($exon->start - 6, $exon->start + 2);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
576 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
577
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
578 my $slice = $exon->slice()->sub_Slice($start, $end, $exon->strand);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
579 my $seq = $slice->seq() if defined($slice);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
580
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
581 return $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
582 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
583
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
584 sub get_acceptor_seq_from_exon {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
585 my ($self, $exon) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
586
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
587 my ($start, $end);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
588
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
589 if ($exon->strand > 0) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
590 ($start, $end) = ($exon->start - 20, $exon->start + 2);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
591 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
592 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
593 ($start, $end) = ($exon->end - 2, $exon->end + 20);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
594 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
595
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
596 my $slice = $exon->slice()->sub_Slice($start, $end, $exon->strand);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
597 my $seq = $slice->seq() if defined($slice);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
598
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
599 return $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
600 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
601
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
602 sub get_donor_seq_from_intron {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
603 my ($self, $intron) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
604
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
605 my ($start, $end);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
606
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
607 if ($intron->strand > 0) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
608 ($start, $end) = ($intron->start - 3, $intron->start + 5);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
609 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
610 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
611 ($start, $end) = ($intron->end - 5, $intron->end + 3);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
612 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
613
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
614 my $slice = $intron->slice()->sub_Slice($start, $end, $intron->strand);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
615 my $seq = $slice->seq() if defined($slice);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
616
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
617 return $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
618 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
619
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
620 sub get_acceptor_seq_from_intron {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
621 my ($self, $intron) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
622
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
623 my ($start, $end);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
624
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
625 if ($intron->strand > 0) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
626 ($start, $end) = ($intron->end - 19, $intron->end + 3);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
627 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
628 else {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
629 ($start, $end) = ($intron->start - 3, $intron->start + 19);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
630 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
631
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
632 my $slice = $intron->slice()->sub_Slice($start, $end, $intron->strand);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
633 my $seq = $slice->seq() if defined($slice);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
634
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
635 return $seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
636 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
637
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
638
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
639 ## Common methods
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
640 #################
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
641
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
642 sub get_seqs {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
643 my ($self, $tva, $start, $end) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
644 my $vf = $tva->variation_feature;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
645
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
646 my $tr_strand = $tva->transcript->strand;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
647
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
648 my $ref_slice = $vf->{slice}->sub_Slice($start, $end, $tr_strand);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
649
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
650 my ($ref_seq, $alt_seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
651
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
652 if (defined $ref_slice) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
653
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
654 $ref_seq = $alt_seq = $ref_slice->seq();
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
655
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
656 my $substr_start = $tr_strand > 0 ? $vf->{start} - $start : $end - $vf->{end};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
657 my $feature_seq = $tva->seq_length > 0 ? $tva->feature_seq : '';
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
658
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
659 substr($alt_seq, $substr_start, ($vf->{end} - $vf->{start}) + 1) = $feature_seq;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
660 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
661
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
662 return [$ref_seq, $alt_seq];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
663 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
664
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
665 sub score5 {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
666 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
667 my $seq = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
668 my $hex = md5_hex($seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
669
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
670 # check cache
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
671 if($self->{cache}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
672 my ($res) = grep {$_->{hex} eq $hex} @{$self->{cache}->{score5}};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
673
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
674 return $res->{score} if $res;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
675 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
676
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
677 my $a = $self->score5_scoreconsensus($seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
678 die("ERROR: No score5_scoreconsensus\n") unless defined($a);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
679
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
680 my $b = $self->score5_getrest($seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
681 die("ERROR: No score5_getrest\n") unless defined($b);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
682
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
683 my $c = $self->{'score5_seq'}->{$b};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
684 die("ERROR: No score5_seq for $b\n") unless defined($c);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
685
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
686 my $d = $self->{'score5_me2x5'}->{$c};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
687 die("ERROR: No score5_me2x5 for $c\n") unless defined($d);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
688
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
689 my $score = $self->log2($a * $d);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
690
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
691 # cache it
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
692 push @{$self->{cache}->{score5}}, { hex => $hex, score => $score };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
693 shift @{$self->{cache}->{score5}} while scalar @{$self->{cache}->{score5}} > $CACHE_SIZE;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
694
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
695 return $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
696 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
697
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
698 sub score3 {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
699 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
700 my $seq = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
701 my $hex = md5_hex($seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
702
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
703 # check cache
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
704 if($self->{cache}) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
705 my ($res) = grep {$_->{hex} eq $hex} @{$self->{cache}->{score3}};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
706
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
707 return $res->{score} if $res;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
708 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
709
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
710 my $a = $self->score3_scoreconsensus($seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
711 die("ERROR: No score3_scoreconsensus\n") unless defined($a);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
712
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
713 my $b = $self->score3_getrest($seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
714 die("ERROR: No score3_getrest\n") unless defined($b);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
715
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
716 my $c = $self->score3_maxentscore($b, $self->{'score3_metables'});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
717 die("ERROR: No score3_maxentscore for $b\n") unless defined($c);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
718
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
719 my $score = $self->log2($a * $c);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
720
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
721 # cache it
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
722 push @{$self->{cache}->{score3}}, { hex => $hex, score => $score };
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
723 shift @{$self->{cache}->{score3}} while scalar @{$self->{cache}->{score3}} > $CACHE_SIZE;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
724
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
725 return $score;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
726 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
727
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
728
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
729 ## methods copied from score5.pl
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
730 ################################
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
731
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
732 sub score5_makesequencematrix {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
733 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
734 my $file = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
735 my %matrix;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
736 my $n=0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
737 open(SCOREF, $file) || die "Can't open $file!\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
738 while(<SCOREF>) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
739 chomp;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
740 $_=~ s/\s//;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
741 $matrix{$_} = $n;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
742 $n++;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
743 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
744 close(SCOREF);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
745 return \%matrix;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
746 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
747
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
748 sub score5_makescorematrix {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
749 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
750 my $file = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
751 my %matrix;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
752 my $n=0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
753 open(SCOREF, $file) || die "Can't open $file!\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
754 while(<SCOREF>) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
755 chomp;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
756 $_=~ s/\s//;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
757 $matrix{$n} = $_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
758 $n++;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
759 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
760 close(SCOREF);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
761 return \%matrix;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
762 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
763
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
764 sub score5_getrest {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
765 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
766 my $seq = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
767 my @seqa = split(//,uc($seq));
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
768 return $seqa[0].$seqa[1].$seqa[2].$seqa[5].$seqa[6].$seqa[7].$seqa[8];
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
769 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
770
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
771 sub score5_scoreconsensus {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
772 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
773 my $seq = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
774 my @seqa = split(//,uc($seq));
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
775 my %bgd;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
776 $bgd{'A'} = 0.27;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
777 $bgd{'C'} = 0.23;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
778 $bgd{'G'} = 0.23;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
779 $bgd{'T'} = 0.27;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
780 my %cons1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
781 $cons1{'A'} = 0.004;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
782 $cons1{'C'} = 0.0032;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
783 $cons1{'G'} = 0.9896;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
784 $cons1{'T'} = 0.0032;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
785 my %cons2;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
786 $cons2{'A'} = 0.0034;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
787 $cons2{'C'} = 0.0039;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
788 $cons2{'G'} = 0.0042;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
789 $cons2{'T'} = 0.9884;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
790 my $addscore = $cons1{$seqa[3]}*$cons2{$seqa[4]}/($bgd{$seqa[3]}*$bgd{$seqa[4]});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
791 return $addscore;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
792 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
793
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
794 sub log2 {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
795 my ($self, $val) = @_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
796 return log($val)/log(2);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
797 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
798
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
799
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
800 ## methods copied from score3.pl
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
801 ################################
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
802
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
803 sub score3_hashseq {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
804 #returns hash of sequence in base 4
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
805 # $self->score3_hashseq('CAGAAGT') returns 4619
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
806 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
807 my $seq = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
808 $seq = uc($seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
809 $seq =~ tr/ACGT/0123/;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
810 my @seqa = split(//,$seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
811 my $sum = 0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
812 my $len = length($seq);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
813 my @four = (1,4,16,64,256,1024,4096,16384);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
814 my $i=0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
815 while ($i<$len) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
816 $sum+= $seqa[$i] * $four[$len - $i -1] ;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
817 $i++;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
818 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
819 return $sum;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
820 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
821
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
822 sub score3_makemaxentscores {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
823 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
824 my $dir = $self->{'_dir'}."/splicemodels/";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
825 my @list = ('me2x3acc1','me2x3acc2','me2x3acc3','me2x3acc4',
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
826 'me2x3acc5','me2x3acc6','me2x3acc7','me2x3acc8','me2x3acc9');
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
827 my @metables;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
828 my $num = 0 ;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
829 foreach my $file (@list) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
830 my $n = 0;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
831 open (SCOREF,"<".$dir.$file) || die "Can't open $file!\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
832 while(<SCOREF>) {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
833 chomp;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
834 $_=~ s/\s//;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
835 $metables[$num]{$n} = $_;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
836 $n++;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
837 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
838 close(SCOREF);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
839 #print STDERR $file."\t".$num."\t".$n."\n";
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
840 $num++;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
841 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
842 return \@metables;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
843 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
844
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
845 sub score3_maxentscore {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
846 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
847 my $seq = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
848 my $table_ref = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
849 my @metables = @$table_ref;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
850 my @sc;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
851 $sc[0] = $metables[0]{$self->score3_hashseq(substr($seq,0,7))};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
852 $sc[1] = $metables[1]{$self->score3_hashseq(substr($seq,7,7))};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
853 $sc[2] = $metables[2]{$self->score3_hashseq(substr($seq,14,7))};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
854 $sc[3] = $metables[3]{$self->score3_hashseq(substr($seq,4,7))};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
855 $sc[4] = $metables[4]{$self->score3_hashseq(substr($seq,11,7))};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
856 $sc[5] = $metables[5]{$self->score3_hashseq(substr($seq,4,3))};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
857 $sc[6] = $metables[6]{$self->score3_hashseq(substr($seq,7,4))};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
858 $sc[7] = $metables[7]{$self->score3_hashseq(substr($seq,11,3))};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
859 $sc[8] = $metables[8]{$self->score3_hashseq(substr($seq,14,4))};
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
860 my $finalscore = $sc[0] * $sc[1] * $sc[2] * $sc[3] * $sc[4] / ($sc[5] * $sc[6] * $sc[7] * $sc[8]);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
861 return $finalscore;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
862 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
863
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
864 sub score3_getrest {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
865 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
866 my $seq = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
867 my $seq_noconsensus = substr($seq,0,18).substr($seq,20,3);
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
868 return $seq_noconsensus;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
869 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
870
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
871 sub score3_scoreconsensus {
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
872 my $self = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
873 my $seq = shift;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
874 my @seqa = split(//,uc($seq));
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
875 my %bgd;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
876 $bgd{'A'} = 0.27;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
877 $bgd{'C'} = 0.23;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
878 $bgd{'G'} = 0.23;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
879 $bgd{'T'} = 0.27;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
880 my %cons1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
881 $cons1{'A'} = 0.9903;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
882 $cons1{'C'} = 0.0032;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
883 $cons1{'G'} = 0.0034;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
884 $cons1{'T'} = 0.0030;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
885 my %cons2;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
886 $cons2{'A'} = 0.0027;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
887 $cons2{'C'} = 0.0037;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
888 $cons2{'G'} = 0.9905;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
889 $cons2{'T'} = 0.0030;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
890 my $addscore = $cons1{$seqa[18]} * $cons2{$seqa[19]}/ ($bgd{$seqa[18]} * $bgd{$seqa[19]});
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
891 return $addscore;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
892 }
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
893
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
894 1;
e545d0a25ffe Uploaded
dvanzessen
parents:
diff changeset
895