annotate variant_effect_predictor/Bio/EnsEMBL/Variation/Utils/VEP.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 # EnsEMBL module for Bio::EnsEMBL::Variation::Utils::Sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 Bio::EnsEMBL::Variation::Utils::VEP - Methods used by the Variant Effect Predictor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 use Bio::EnsEMBL::Variation::Utils::VEP qw(configure);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 my $config = configure();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 =head1 METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 package Bio::EnsEMBL::Variation::Utils::VEP;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 # module list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 use Getopt::Long;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 use FileHandle;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 use File::Path qw(make_path);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 use Storable qw(nstore_fd fd_retrieve freeze thaw);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 use Scalar::Util qw(weaken);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 use Digest::MD5 qw(md5_hex);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 use Bio::EnsEMBL::Registry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 use Bio::EnsEMBL::Variation::VariationFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 use Bio::EnsEMBL::Variation::DBSQL::VariationFeatureAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use Bio::EnsEMBL::Variation::Utils::VariationEffect qw(MAX_DISTANCE_FROM_TRANSCRIPT overlap);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use Bio::EnsEMBL::Variation::Utils::Sequence qw(unambiguity_code);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 use Bio::EnsEMBL::Variation::Utils::EnsEMBL2GFF3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 use Bio::EnsEMBL::Variation::StructuralVariationFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 use Bio::EnsEMBL::Variation::DBSQL::StructuralVariationFeatureAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 use Bio::EnsEMBL::Variation::TranscriptStructuralVariation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 # we need to manually include all these modules for caching to work
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 use Bio::EnsEMBL::CoordSystem;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 use Bio::EnsEMBL::Transcript;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 use Bio::EnsEMBL::Translation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 use Bio::EnsEMBL::Exon;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 use Bio::EnsEMBL::ProteinFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 use Bio::EnsEMBL::Analysis;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 use Bio::EnsEMBL::DBSQL::GeneAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 use Bio::EnsEMBL::DBSQL::SliceAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 use Bio::EnsEMBL::DBSQL::TranslationAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 use Bio::EnsEMBL::DBSQL::TranscriptAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 use Bio::EnsEMBL::DBSQL::MetaContainer;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 use Bio::EnsEMBL::DBSQL::CoordSystemAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 use Exporter;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 use vars qw(@ISA @EXPORT_OK);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 @ISA = qw(Exporter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 # open socket pairs for cross-process comms
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 use Socket;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 socketpair(CHILD, PARENT, AF_UNIX, SOCK_STREAM, PF_UNSPEC) or die "ERROR: Failed to open socketpair: $!";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 CHILD->autoflush(1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 PARENT->autoflush(1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 @EXPORT_OK = qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 &parse_line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 &vf_to_consequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 &validate_vf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 &read_cache_info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 &dump_adaptor_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 &load_dumped_adaptor_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 &load_dumped_variation_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 &get_all_consequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 &get_slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 &build_slice_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 &build_full_cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 &regions_from_hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 &get_time
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 &debug
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 &convert_to_vcf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 &progress
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 &end_progress
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 @REG_FEAT_TYPES
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 @OUTPUT_COLS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 @VEP_WEB_CONFIG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 %FILTER_SHORTCUTS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 our @OUTPUT_COLS = qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 Uploaded_variation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 Location
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 Allele
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 Gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 Feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 Feature_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 Consequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 cDNA_position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 CDS_position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 Protein_position
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 Amino_acids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 Codons
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 Existing_variation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 Extra
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 our @REG_FEAT_TYPES = qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 RegulatoryFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 MotifFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 our @VEP_WEB_CONFIG = qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 check_existing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 coding_only
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 core_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 hgnc
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 protein
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 hgvs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 terms
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 check_frequency
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 freq_filter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 freq_gt_lt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 freq_freq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 freq_pop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 sift
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 polyphen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 regulatory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 our %FILTER_SHORTCUTS = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 upstream => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 '5KB_upstream_variant' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 '2KB_upstream_variant' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 downstream => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 '5KB_downstream_variant' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 '2KB_downstream_variant' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 '500B_downstream_variant' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 utr => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 '5_prime_UTR_variant' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 '3_prime_UTR_variant' => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 splice => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 splice_donor_variant => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 splice_acceptor_variant => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 splice_region_variant => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 coding_change => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 stop_lost => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 stop_gained => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 missense_variant => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 frameshift_variant => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 inframe_insertion => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 inframe_deletion => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 regulatory => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 regulatory_region_variant => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 TF_binding_site_variant => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 # parses a line of input, returns VF object(s)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 sub parse_line {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 # find out file format - will only do this on first line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 if(!defined($config->{format}) || (defined($config->{format}) && $config->{format} eq 'guess')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 $config->{format} = &detect_format($line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 debug("Detected format of input file as ", $config->{format}) unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 # HGVS and ID formats need DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 die("ERROR: Can't use ".uc($config->{format})." format in offline mode") if $config->{format} =~ /id|hgvs/ && defined($config->{offline});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 # force certain options if format is VEP output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 if($config->{format} eq 'vep') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 $config->{no_consequence} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 delete $config->{regulatory};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 debug("Forcing no consequence calculation") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 # check that format is vcf when using --individual
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 die("ERROR: --individual only compatible with VCF input files\n") if defined($config->{individual}) && $config->{format} ne 'vcf';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 my $parse_method = 'parse_'.$config->{format};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 $parse_method =~ s/vep_//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 my $method_ref = \&$parse_method;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 my $vfs = &$method_ref($config, $line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 $vfs = add_lrg_mappings($config, $vfs) if defined($config->{lrg});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 return $vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 # sub-routine to detect format of input
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 sub detect_format {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 my @data = split /\s+/, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 # HGVS: ENST00000285667.3:c.1047_1048insC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 if (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 scalar @data == 1 &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 $data[0] =~ /^([^\:]+)\:.*?([cgmrp]?)\.?([\*\-0-9]+.*)$/i
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 return 'hgvs';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 # variant identifier: rs123456
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 elsif (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 scalar @data == 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 return 'id';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 # VCF: 20 14370 rs6054257 G A 29 0 NS=58;DP=258;AF=0.786;DB;H2 GT:GQ:DP:HQ
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 elsif (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 $data[0] =~ /(chr)?\w+/ &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 $data[1] =~ /^\d+$/ &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 $data[3] =~ /^[ACGTN-]+$/i &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 $data[4] =~ /^([\.ACGTN-]+\,?)+$/i
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 return 'vcf';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 # pileup: chr1 60 T A
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 elsif (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 $data[0] =~ /(chr)?\w+/ &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 $data[1] =~ /^\d+$/ &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 $data[2] =~ /^[\*ACGTN-]+$/i &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 $data[3] =~ /^[\*ACGTNRYSWKM\+\/-]+$/i
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 return 'pileup';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 # ensembl: 20 14370 14370 A/G +
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 elsif (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 $data[0] =~ /\w+/ &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 $data[1] =~ /^\d+$/ &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 $data[2] =~ /^\d+$/ &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 $data[3] =~ /[ACGTN-]+\/[ACGTN-]+/i
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 return 'ensembl';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 # vep output: ID 1:142849179 - - - - INTERGENIC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 elsif (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 $data[0] =~ /\w+/ &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 $data[1] =~ /^\w+?\:\d+(\-\d+)*$/ &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 scalar @data == 14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 return 'vep';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 die("ERROR: Could not detect input file format\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 # parse a line of Ensembl format input into a variation feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 sub parse_ensembl {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 my ($chr, $start, $end, $allele_string, $strand, $var_name) = split /\s+/, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 my $vf = Bio::EnsEMBL::Variation::VariationFeature->new_fast({
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 end => $end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 allele_string => $allele_string,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 strand => $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 map_weight => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 adaptor => $config->{vfa},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 variation_name => $var_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 chr => $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 return [$vf];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 # parse a line of VCF input into a variation feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 sub parse_vcf {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 my @data = split /\s+/, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 # non-variant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 my $non_variant = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 if($data[4] eq '.') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 if(defined($config->{allow_non_variant})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 $non_variant = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 return [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 # get relevant data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 my ($chr, $start, $end, $ref, $alt) = ($data[0], $data[1], $data[1], $data[3], $data[4]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 # some VCF files have a GRCh37 pos defined in GP flag in INFO column
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 # if user has requested, we can use that as the position instead
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 if(defined $config->{gp}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 $chr = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 $start = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 foreach my $pair(split /\;/, $data[7]) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 my ($key, $value) = split /\=/, $pair;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 if($key eq 'GP') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 ($chr, $start) = split /\:/, $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 $end = $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 unless(defined($chr) and defined($start)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 warn "No GP flag found in INFO column" unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 return [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 # adjust end coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 $end += (length($ref) - 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 # structural variation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 if((defined($data[7]) && $data[7] =~ /SVTYPE/) || $alt =~ /\<|\[|\]|\>/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 # parse INFO field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 my %info = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 foreach my $bit(split /\;/, $data[7]) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 my ($key, $value) = split /\=/, $bit;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 $info{$key} = $value;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 # like indels, SVs have the base before included for reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 $start++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 # work out the end coord
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 if(defined($info{END})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 $end = $info{END};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 elsif(defined($info{SVLEN})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 $end = $start + abs($info{SVLEN}) - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 # check for imprecise breakpoints
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 my ($min_start, $max_start, $min_end, $max_end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 if(defined($info{CIPOS})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 my ($low, $high) = split /\,/, $info{CIPOS};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 $min_start = $start + $low;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 $max_start = $start + $high;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 if(defined($info{CIEND})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 my ($low, $high) = split /\,/, $info{CIEND};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 $min_end = $end + $low;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 $max_end = $end + $high;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 # get type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 my $type = $info{SVTYPE};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 my $so_term;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 if(defined($type)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 # convert to SO term
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 my %terms = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 INS => 'insertion',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 DEL => 'deletion',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 TDUP => 'tandem_duplication',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 DUP => 'duplication'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 $so_term = defined $terms{$type} ? $terms{$type} : $type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 my $svf = Bio::EnsEMBL::Variation::StructuralVariationFeature->new_fast({
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 inner_start => $max_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 outer_start => $min_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 end => $end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 inner_end => $min_end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 outer_end => $max_end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 strand => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 adaptor => $config->{svfa},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 variation_name => $data[2] eq '.' ? undef : $data[2],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 chr => $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 class_SO_term => $so_term,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 return [$svf];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 # normal variation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 # find out if any of the alt alleles make this an insertion or a deletion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 my ($is_indel, $is_sub, $ins_count, $total_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 foreach my $alt_allele(split /\,/, $alt) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 $is_indel = 1 if $alt_allele =~ /D|I/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 $is_indel = 1 if length($alt_allele) != length($ref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 $is_sub = 1 if length($alt_allele) == length($ref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 $ins_count++ if length($alt_allele) > length($ref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 $total_count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 # multiple alt alleles?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 if($alt =~ /\,/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 if($is_indel) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 my @alts;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 if($alt =~ /D|I/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 foreach my $alt_allele(split /\,/, $alt) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 # deletion (VCF <4)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 if($alt_allele =~ /D/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 push @alts, '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 elsif($alt_allele =~ /I/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 $alt_allele =~ s/^I//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 push @alts, $alt_allele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 $ref = substr($ref, 1) || '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 $start++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 foreach my $alt_allele(split /\,/, $alt) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 $alt_allele = substr($alt_allele, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 $alt_allele = '-' if $alt_allele eq '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 push @alts, $alt_allele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 $alt = join "/", @alts;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 # for substitutions we just need to replace ',' with '/' in $alt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 $alt =~ s/\,/\//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 elsif($is_indel) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 # deletion (VCF <4)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 if($alt =~ /D/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 my $num_deleted = $alt;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 $num_deleted =~ s/\D+//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 $end += $num_deleted - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 $alt = "-";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 $ref .= ("N" x ($num_deleted - 1)) unless length($ref) > 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 # insertion (VCF <4)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 elsif($alt =~ /I/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 $ref = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 $alt =~ s/^I//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 $start++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 # insertion or deletion (VCF 4+)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 elsif(substr($ref, 0, 1) eq substr($alt, 0, 1)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 # chop off first base
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 $ref = substr($ref, 1) || '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 $alt = substr($alt, 1) || '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 $start++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 # create VF object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 my $vf = Bio::EnsEMBL::Variation::VariationFeature->new_fast({
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 end => $end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 allele_string => $non_variant ? $ref : $ref.'/'.$alt,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 strand => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 map_weight => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 adaptor => $config->{vfa},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 variation_name => $data[2] eq '.' ? undef : $data[2],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 chr => $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 # flag as non-variant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 $vf->{non_variant} = 1 if $non_variant;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 # individuals?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516 if(defined($config->{individual})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 my @alleles = split /\//, $ref.'/'.$alt;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 my @return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 foreach my $ind(keys %{$config->{ind_cols}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 # get alleles present in this individual
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 my @bits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 my $gt = (split /\:/, $data[$config->{ind_cols}->{$ind}])[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 my $phased = ($gt =~ /\|/ ? 1 : 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 foreach my $bit(split /\||\/|\\/, $gt) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 push @bits, $alleles[$bit] unless $bit eq '.';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 # shallow copy VF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 my $vf_copy = { %$vf };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 bless $vf_copy, ref($vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 # get non-refs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 my %non_ref = map {$_ => 1} grep {$_ ne $ref} @bits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 # construct allele_string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 if(scalar keys %non_ref) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 $vf_copy->{allele_string} = $ref."/".(join "/", keys %non_ref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 $vf_copy->{allele_string} = $ref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 $vf_copy->{non_variant} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 # store phasing info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 $vf_copy->{phased} = defined($config->{phased} ? 1 : $phased);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 # store GT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553 $vf_copy->{genotype} = \@bits;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 # store individual name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 $vf_copy->{individual} = $ind;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 push @return, $vf_copy;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 return \@return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 return [$vf];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 # parse a line of pileup input into variation feature objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 sub parse_pileup {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 my @data = split /\s+/, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 # pileup can produce more than one VF per line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 my @return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 # normal variant
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 if($data[2] ne "*"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 my $var;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 if($data[3] =~ /^[A|C|G|T]$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 $var = $data[3];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 ($var = unambiguity_code($data[3])) =~ s/$data[2]//ig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 for my $alt(split //, $var){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 push @return, Bio::EnsEMBL::Variation::VariationFeature->new_fast({
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 start => $data[1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 end => $data[1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 allele_string => $data[2].'/'.$alt,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 strand => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596 map_weight => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 adaptor => $config->{vfa},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 chr => $data[0],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603 # in/del
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 my %tmp_hash = map {$_ => 1} split /\//, $data[3];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606 my @genotype = keys %tmp_hash;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 foreach my $allele(@genotype){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609 if(substr($allele,0,1) eq "+") { #ins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 push @return, Bio::EnsEMBL::Variation::VariationFeature->new_fast({
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 start => $data[1] + 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612 end => $data[1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 allele_string => '-/'.substr($allele, 1),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 strand => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615 map_weight => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 adaptor => $config->{vfa},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 chr => $data[0],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 elsif(substr($allele,0,1) eq "-"){ #del
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 push @return, Bio::EnsEMBL::Variation::VariationFeature->new_fast({
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 start => $data[1] + 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 end => $data[1] + length(substr($allele, 1)),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 allele_string => substr($allele, 1).'/-',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625 strand => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 map_weight => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627 adaptor => $config->{vfa},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 chr => $data[0],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631 elsif($allele ne "*"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 warn("WARNING: invalid pileup indel genotype: $line\n") unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637 return \@return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640 # parse a line of HGVS input into a variation feature object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 sub parse_hgvs {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 my $vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 # not all hgvs notations are supported yet, so we have to wrap it in an eval
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 eval { $vf = $config->{vfa}->fetch_by_hgvs_notation($line, $config->{sa}, $config->{ta}) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 if((!defined($vf) || (defined $@ && length($@) > 1)) && defined($config->{coordinator})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651 eval { $vf = $config->{vfa}->fetch_by_hgvs_notation($line, $config->{ofsa}, $config->{ofta}) };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654 if(!defined($vf) || (defined $@ && length($@) > 1)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 warn("WARNING: Unable to parse HGVS notation \'$line\'\n$@") unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 return [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 # get whole chromosome slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 my $slice = $vf->slice->adaptor->fetch_by_region($vf->slice->coord_system->name, $vf->slice->seq_region_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 $vf = $vf->transfer($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 # name it after the HGVS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665 $vf->{variation_name} = $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 # add chr attrib
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 $vf->{chr} = $vf->slice->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 return [$vf];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 # parse a variation identifier e.g. a dbSNP rsID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674 sub parse_id {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 my $v_obj = $config->{va}->fetch_by_name($line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 return [] unless defined $v_obj;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682 my @vfs = @{$v_obj->get_all_VariationFeatures};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 delete $_->{dbID} for @vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684 delete $_->{overlap_consequences} for @vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685 $_->{chr} = $_->seq_region_name for @vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687 return \@vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 # parse a line of VEP output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 sub parse_vep {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695 my @data = split /\t/, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 my ($chr, $start, $end) = split /\:|\-/, $data[1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 $end ||= $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 # might get allele string from ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 my $allele_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703 if($data[0] =~ /^\w\_\w\_\w$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 my @split = split /\_/, $data[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705 $allele_string = $split[-1] if $split[-1] =~ /[ACGTN-]+\/[ACGTN-]+/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708 $allele_string ||= 'N/'.($data[6] =~ /intergenic/ ? 'N' : $data[2]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 my $vf = Bio::EnsEMBL::Variation::VariationFeature->new_fast({
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711 start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 end => $end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 allele_string => $allele_string,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 strand => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 map_weight => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716 adaptor => $config->{vfa},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 chr => $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 variation_name => $data[0],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 return [$vf];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 # converts to VCF format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 sub convert_to_vcf {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731 # look for imbalance in the allele string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 my %allele_lengths;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 my @alleles = split /\//, $vf->allele_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 foreach my $allele(@alleles) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 $allele =~ s/\-//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 $allele_lengths{length($allele)} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 # in/del/unbalanced
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 if(scalar keys %allele_lengths > 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 # we need the ref base before the variation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744 # default to N in case we can't get it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745 my $prev_base = 'N';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747 unless(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 my $slice = $vf->slice->sub_Slice($vf->start - 1, $vf->start - 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 $prev_base = $slice->seq if defined($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 for my $i(0..$#alleles) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 $alleles[$i] =~ s/\-//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754 $alleles[$i] = $prev_base.$alleles[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 return [
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 $vf->{chr} || $vf->seq_region_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 $vf->start - 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 $vf->variation_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 shift @alleles,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762 (join ",", @alleles),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 '.', '.', '.'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764 ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768 # balanced sub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770 return [
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 $vf->{chr} || $vf->seq_region_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 $vf->start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 $vf->variation_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 shift @alleles,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775 (join ",", @alleles),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 '.', '.', '.'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777 ];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782 # tries to map a VF to the LRG coordinate system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 sub add_lrg_mappings {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785 my $vfs = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 my @new_vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 foreach my $vf(@$vfs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 # add the unmapped VF to the array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 push @new_vfs, $vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 # make sure the VF has an attached slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 $vf->{slice} ||= get_slice($config, $vf->{chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 next unless defined($vf->{slice});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 # transform LRG <-> chromosome
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799 my $new_vf = $vf->transform($vf->{slice}->coord_system->name eq 'lrg' ? 'chromosome' : 'lrg');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 # add it to the array if transformation worked
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802 if(defined($new_vf)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 # update new VF's chr entry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805 $new_vf->{chr} = $new_vf->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806 push @new_vfs, $new_vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 return \@new_vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814 # wrapper for whole_genome_fetch and vf_to_consequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 # takes config and a listref of VFs, returns listref of line hashes for printing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816 sub get_all_consequences {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 my $listref = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820 if ($config->{extra}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821 eval "use Plugin qw($config);"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 # initialize caches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 $config->{$_.'_cache'} ||= {} for qw(tr rf slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827 # build hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 my %vf_hash;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 push @{$vf_hash{$_->{chr}}{int($_->{start} / $config->{chunk_size})}{$_->{start}}}, $_ for grep {!defined($_->{non_variant})} @$listref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 my @non_variant = grep {defined($_->{non_variant})} @$listref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 debug("Skipping ".(scalar @non_variant)." non-variant loci\n") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 # get regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835 my $regions = &regions_from_hash($config, \%vf_hash);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836 my $trim_regions = $regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 # get trimmed regions - allows us to discard out-of-range transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 # when using cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 #if(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841 # my $tmp = $config->{cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 # delete $config->{cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 # $trim_regions = regions_from_hash($config, \%vf_hash);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 # $config->{cache} = $tmp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 # prune caches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 prune_cache($config, $config->{tr_cache}, $regions, $config->{loaded_tr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 prune_cache($config, $config->{rf_cache}, $regions, $config->{loaded_rf});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 # get chr list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 my %chrs = map {$_->{chr} => 1} @$listref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 my $fetched_tr_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 $fetched_tr_count = fetch_transcripts($config, $regions, $trim_regions)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856 unless defined($config->{no_consequences});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858 my $fetched_rf_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 $fetched_rf_count = fetch_regfeats($config, $regions, $trim_regions)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860 if defined($config->{regulatory})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861 && !defined($config->{no_consequences});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 # check we can use MIME::Base64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864 if(defined($config->{fork})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 eval q{ use MIME::Base64; };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 if($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 debug("WARNING: Unable to load MIME::Base64, forking disabled") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 delete $config->{fork};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873 my (@temp_array, @return, @pids);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 if(defined($config->{fork})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 my $size = scalar @$listref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 while(my $tmp_vf = shift @$listref) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878 push @temp_array, $tmp_vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880 # fork
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 if(scalar @temp_array >= ($size / $config->{fork}) || scalar @$listref == 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 my $pid = fork;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 if(!defined($pid)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886 debug("WARNING: Failed to fork - will attempt to continue without forking") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 push @temp_array, @$listref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 push @return, @{vf_list_to_cons($config, \@temp_array, $regions)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891 elsif($pid) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892 push @pids, $pid;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 @temp_array = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 elsif($pid == 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 $config->{forked} = $$;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 $config->{quiet} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900 # redirect STDERR to PARENT so we can catch errors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901 *STDERR = *PARENT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903 my $cons = vf_list_to_cons($config, \@temp_array, $regions);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 # what we're doing here is sending a serialised hash of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 # results through to the parent process through the socket.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907 # This is then thawed by the parent process.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908 # $$, or the PID, is added so that the input can be sorted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 # back into the correct order for output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910 print PARENT $$." ".encode_base64(freeze($_), "\t")."\n" for @$cons;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912 # some plugins may cache stuff, check for this and try and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913 # reconstitute it into parent's plugin cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914 foreach my $plugin(@{$config->{plugins}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 next unless defined($plugin->{has_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 # delete unnecessary stuff and stuff that can't be serialised
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 delete $plugin->{$_} for qw(config feature_types variant_feature_types version feature_types_wanted variant_feature_types_wanted params);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 print PARENT $$." PLUGIN ".ref($plugin)." ".encode_base64(freeze($plugin), "\t")."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923 # we need to tell the parent this child is finished
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 # otherwise it keeps listening
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925 print PARENT "DONE $$\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 close PARENT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928 exit(0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 debug("Calculating consequences") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935 my $fh = $config->{out_file_handle};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936 my $done_processes = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 my $done_vars = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 my $total_size = $size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 my $pruned_count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 # create a hash to store the returned data by PID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 # this means we can sort it correctly on output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943 my %by_pid;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945 # read child input
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946 while(<CHILD>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 # child finished
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949 if(/^DONE/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 $done_processes++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951 last if $done_processes == scalar @pids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 # variant finished / progress indicator
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955 elsif(/^BUMP/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956 progress($config, ++$done_vars, $total_size);# if $pruned_count == scalar @pids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 # output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 elsif(/^\-?\d+ /) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962 # plugin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 if(/^\-?\d+ PLUGIN/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 m/^(\-?\d+) PLUGIN (\w+) /;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966 my ($pid, $plugin) = ($1, $2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 # remove the PID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 s/^\-?\d+ PLUGIN \w+ //;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 my $tmp = thaw(decode_base64($_));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974 next unless defined($plugin);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976 # copy data to parent plugin
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977 my ($parent_plugin) = grep {ref($_) eq $plugin} @{$config->{plugins}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979 next unless defined($parent_plugin);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 merge_hashes($parent_plugin, $tmp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985 # grab the PID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986 m/^(\-?\d+)\s/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 my $pid = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 die "ERROR: Could not parse forked PID from line $_" unless defined($pid);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990 # remove the PID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991 s/^\-?\d+\s//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 # decode and thaw "output" from forked process
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995 push @{$by_pid{$pid}}, thaw(decode_base64($_));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999 elsif(/^PRUNED/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000 s/PRUNED //g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002 $pruned_count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003 $total_size += $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006 elsif(/^DEBUG/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007 print STDERR;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 # something's wrong
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012 # kill the other pids
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013 kill(15, $_) for @pids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 die("\nERROR: Forked process failed\n$_\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020 debug("Writing output") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022 waitpid($_, 0) for @pids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024 # add the sorted data to the return array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025 push @return, @{$by_pid{$_} || []} for @pids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028 # no forking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 push @return, @{vf_list_to_cons($config, $listref, $regions)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033 if(defined($config->{debug})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034 eval q{use Devel::Size qw(total_size)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035 my $mem = memory();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 my $tot;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 $tot += $_ for @$mem;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 if($tot > 1000000) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040 $tot = sprintf("%.2fGB", $tot / (1024 * 1024));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043 elsif($tot > 1000) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044 $tot = sprintf("%.2fMB", $tot / 1024);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 my $mem_diff = mem_diff($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048 debug(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049 "LINES ", $config->{line_number},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050 "\tMEMORY $tot ", (join " ", @$mem),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051 "\tDIFF ", (join " ", @$mem_diff),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052 "\tCACHE ", total_size($config->{tr_cache}).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053 "\tRF ", total_size($config->{rf_cache}),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054 "\tVF ", total_size(\%vf_hash),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056 #exit(0) if grep {$_ < 0} @$mem_diff;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 return \@return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 sub vf_list_to_cons {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 my $listref = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065 my $regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067 # get non-variants
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 my @non_variants = grep {$_->{non_variant}} @$listref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070 my %vf_hash = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 push @{$vf_hash{$_->{chr}}{int($_->{start} / $config->{chunk_size})}{$_->{start}}}, $_ for grep {!defined($_->{non_variant})} @$listref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073 # check existing VFs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074 &check_existing_hash($config, \%vf_hash) if defined($config->{check_existing});# && scalar @$listref > 10;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076 # get overlapping SVs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077 &check_svs_hash($config, \%vf_hash) if defined($config->{check_svs});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079 # if we are forked, we can trim off some stuff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080 if(defined($config->{forked})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 my $tmp = $config->{cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082 delete $config->{cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083 $regions = regions_from_hash($config, \%vf_hash);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084 $config->{cache} = $tmp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086 # prune caches
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 my $new_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 $new_count += prune_cache($config, $config->{tr_cache}, $regions, $config->{loaded_tr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090 $new_count += prune_cache($config, $config->{rf_cache}, $regions, $config->{loaded_rf});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092 print PARENT "PRUNED $new_count\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 my @return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097 foreach my $chr(sort {($a !~ /^\d+$/ || $b !~ /^\d+/) ? $a cmp $b : $a <=> $b} keys %vf_hash) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098 my $finished_vfs = whole_genome_fetch($config, $chr, \%vf_hash);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100 # non-variants?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 if(scalar @non_variants) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102 push @$finished_vfs, grep {$_->{chr} eq $chr} @non_variants;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 # need to re-sort
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 @$finished_vfs = sort {$a->{start} <=> $b->{start} || $a->{end} <=> $b->{end}} @$finished_vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108 debug("Calculating consequences") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 my $vf_count = scalar @$finished_vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111 my $vf_counter = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113 while(my $vf = shift @$finished_vfs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 progress($config, $vf_counter++, $vf_count) unless $vf_count == 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116 my $filter_ok = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 # filtered output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119 if(defined($config->{filter})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 $filter_ok = filter_by_consequence($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 $config->{filter_count} += $filter_ok;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124 # skip filtered lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 next unless $filter_ok;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127 # original output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 if(defined($config->{original})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129 push @return, $vf->{_line};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132 # GVF output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133 elsif(defined($config->{gvf})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 $vf->source("User");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136 $config->{gvf_id} ||= 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138 # get custom annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139 my $custom_annotation = defined($config->{custom}) ? get_custom_annotation($config, $vf) : {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 $custom_annotation->{ID} = $config->{gvf_id}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143 my $tmp = $vf->to_gvf(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144 include_consequences => defined($config->{no_consequences}) ? 0 : 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 extra_attrs => $custom_annotation,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147 push @return, \$tmp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150 # VCF output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151 elsif(defined($config->{vcf})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153 # convert to VCF, otherwise get line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 my $line = $config->{format} eq 'vcf' ? [split /\s+/, $vf->{_line}] : convert_to_vcf($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156 if(!defined($line->[7]) || $line->[7] eq '.') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157 $line->[7] = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160 # get all the lines the normal way
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161 # and process them into VCF-compatible string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 my $string = 'CSQ=';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164 foreach my $line(@{vf_to_consequences($config, $vf)}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166 # use the field list (can be user-defined by setting --fields)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167 for my $col(@{$config->{fields}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169 # skip fields already represented in the VCF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170 next if $col eq 'Uploaded_variation' or $col eq 'Location' or $col eq 'Extra';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 # search for data in main line hash as well as extra field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173 my $data = defined $line->{$col} ? $line->{$col} : $line->{Extra}->{$col};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175 # "-" means null for everything except the Allele field (confusing...)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176 $data = undef if defined($data) and $data eq '-' and $col ne 'Allele';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177 $data =~ s/\,/\&/g if defined $data;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 $string .= defined($data) ? $data : '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179 $string .= '|';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182 $string =~ s/\|$//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183 $string .= ',';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186 $string =~ s/\,$//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188 if(!defined($config->{no_consequences}) && $string ne 'CSQ=') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189 $line->[7] .= ($line->[7] ? ';' : '').$string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192 # get custom annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 if(defined($config->{custom}) && scalar @{$config->{custom}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194 my $custom_annotation = get_custom_annotation($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 foreach my $key(keys %{$custom_annotation}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196 $line->[7] .= ($line->[7] ? ';' : '').$key.'='.$custom_annotation->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200 my $tmp = join "\t", @$line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201 push @return, \$tmp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204 # no consequence output from vep input
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205 elsif(defined($config->{no_consequences}) && $config->{format} eq 'vep') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 my $line = [split /\s+/, $vf->{_line}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209 if($line->[13] eq '-') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210 $line->[13] = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213 # get custom annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214 if(defined($config->{custom})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215 my $custom_annotation = get_custom_annotation($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 foreach my $key(keys %{$custom_annotation}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217 $line->[13] .= ($line->[13] ? ';' : '').$key.'='.$custom_annotation->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221 my $tmp = join "\t", @$line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1222 push @return, \$tmp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1223 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1225 # normal output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1226 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1227 push @return, @{vf_to_consequences($config, $vf)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1228 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1230 print PARENT "BUMP\n" if defined($config->{forked});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1231 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1233 end_progress($config) unless scalar @$listref == 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1234 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1236 return \@return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1237 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1239 # takes a variation feature and returns ready to print consequence information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1240 sub vf_to_consequences {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1241 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1242 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1244 # use a different method for SVs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1245 return svf_to_consequences($config, $vf) if $vf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1247 my @return = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1249 # method name for consequence terms
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1250 my $term_method = $config->{terms}.'_term';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1252 # find any co-located existing VFs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1253 $vf->{existing} ||= find_existing($config, $vf) if defined $config->{check_existing};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1255 # skip based on frequency checks?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1256 if(defined($config->{check_frequency}) && defined($vf->{existing}) && $vf->{existing} ne '-' && defined($config->{va})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1257 return [] unless grep {$_} map {check_frequencies($config, $_)} reverse split(/\,/, $vf->{existing});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1258 $vf->{freqs} = $config->{filtered_freqs};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1259 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1261 # force empty hash into object's transcript_variations if undefined from whole_genome_fetch
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1262 # this will stop the API trying to go off and fill it again
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1263 $vf->{transcript_variations} ||= {} if defined $config->{whole_genome};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1265 # regulatory stuff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1266 if(!defined $config->{coding_only} && defined $config->{regulatory}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1268 for my $rfv (@{ $vf->get_all_RegulatoryFeatureVariations }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1270 my $rf = $rfv->regulatory_feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1272 my $base_line = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1273 Feature_type => 'RegulatoryFeature',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1274 Feature => $rf->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1275 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1276
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1277 if(defined($config->{cell_type}) && scalar(@{$config->{cell_type}})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1278 $base_line->{Extra}->{CELL_TYPE} = join ",",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1279 map {$_.':'.$rf->{cell_types}->{$_}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1280 grep {$rf->{cell_types}->{$_}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1281 @{$config->{cell_type}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1282
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1283 $base_line->{Extra}->{CELL_TYPE} =~ s/\s+/\_/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1284 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1285
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1286 # this currently always returns 'RegulatoryFeature', so we ignore it for now
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1287 #$base_line->{Extra}->{REG_FEAT_TYPE} = $rf->feature_type->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1288
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1289 for my $rfva (@{ $rfv->get_all_alternate_RegulatoryFeatureVariationAlleles }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1291 my $line = init_line($config, $vf, $base_line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1293 $line->{Allele} = $rfva->variation_feature_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1294 $line->{Consequence} = join ',',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1295 map { $_->$term_method || $_->SO_term }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1296 @{ $rfva->get_all_OverlapConsequences };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1298 $line = run_plugins($rfva, $line, $config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1299
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1300 push @return, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1301 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1302 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1304 for my $mfv (@{ $vf->get_all_MotifFeatureVariations }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1306 my $mf = $mfv->motif_feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1308 # check that the motif has a binding matrix, if not there's not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1309 # much we can do so don't return anything
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1311 next unless defined $mf->binding_matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1313 my $matrix = $mf->binding_matrix->description.' '.$mf->display_label;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1314 $matrix =~ s/\s+/\_/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1315
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1316 my $base_line = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1317 Feature_type => 'MotifFeature',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1318 Feature => $mf->binding_matrix->name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1319 Extra => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1320 MOTIF_NAME => $matrix,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1321 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1322 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1324 if(defined($config->{cell_type}) && scalar(@{$config->{cell_type}})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1325 $base_line->{Extra}->{CELL_TYPE} = join ",",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1326 map {$_.':'.$mf->{cell_types}->{$_}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1327 grep {$mf->{cell_types}->{$_}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1328 @{$config->{cell_type}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1329
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1330 $base_line->{Extra}->{CELL_TYPE} =~ s/\s+/\_/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1331 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1333 for my $mfva (@{ $mfv->get_all_alternate_MotifFeatureVariationAlleles }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1334
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1335 my $line = init_line($config, $vf, $base_line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1337 $line->{Extra}->{MOTIF_POS} = $mfva->motif_start if defined $mfva->motif_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1338 $line->{Extra}->{HIGH_INF_POS} = ($mfva->in_informative_position ? 'Y' : 'N');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1339
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1340 my $delta = $mfva->motif_score_delta if $mfva->variation_feature_seq =~ /^[ACGT]+$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1341
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1342 $line->{Extra}->{MOTIF_SCORE_CHANGE} = sprintf("%.3f", $delta) if defined $delta;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1343
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1344 $line->{Allele} = $mfva->variation_feature_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1345 $line->{Consequence} = join ',',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1346 map { $_->$term_method || $_->SO_term }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1347 @{ $mfva->get_all_OverlapConsequences };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1349 $line = run_plugins($mfva, $line, $config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1350
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1351 push @return, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1352 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1353 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1354 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1355
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1356 # get TVs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1357 my $tvs = $vf->get_all_TranscriptVariations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1359 # only most severe
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1360 if(defined($config->{most_severe}) || defined($config->{summary})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1361
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1362 my $line = init_line($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1363
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1364 if(defined($config->{summary})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1365 $line->{Consequence} = join ",", @{$vf->consequence_type($config->{terms}) || $vf->consequence_type};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1366 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1367 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1368 $line->{Consequence} = $vf->display_consequence($config->{terms}) || $vf->display_consequence;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1369 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1371 push @return, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1372 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1374 # pass a true argument to get_IntergenicVariation to stop it doing a reference allele check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1375 # (to stay consistent with the rest of the VEP)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1376 elsif ((my $iv = $vf->get_IntergenicVariation(1)) && !defined($config->{no_intergenic})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1378 for my $iva (@{ $iv->get_all_alternate_IntergenicVariationAlleles }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1380 my $line = init_line($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1381
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1382 $line->{Allele} = $iva->variation_feature_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1383
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1384 my $cons = $iva->get_all_OverlapConsequences->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1386 $line->{Consequence} = $cons->$term_method || $cons->SO_term;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1387
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1388 $line = run_plugins($iva, $line, $config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1390 push @return, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1391 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1392 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1394 # user wants only one conseqeunce per gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1395 elsif(defined($config->{per_gene})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1397 # sort the TVA objects into a hash by gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1398 my %by_gene;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1399
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1400 foreach my $tv(@$tvs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1401 next if(defined $config->{coding_only} && !($tv->affects_cds));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1403 my $gene = $tv->transcript->{_gene_stable_id} || $config->{ga}->fetch_by_transcript_stable_id($tv->transcript->stable_id)->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1404
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1405 push @{$by_gene{$gene}}, @{$tv->get_all_alternate_TranscriptVariationAlleles};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1406 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1407
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1408 foreach my $gene(keys %by_gene) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1409 my ($lowest, $lowest_tva);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1410
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1411 # at the moment this means the one that comes out last will be picked
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1412 # if there is more than one TVA with the same rank of consequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1413 foreach my $tva(@{$by_gene{$gene}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1414 foreach my $oc(@{$tva->get_all_OverlapConsequences}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1415 if(!defined($lowest) || $oc->rank < $lowest) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1416 $lowest = $oc->rank;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1417 $lowest_tva = $tva;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1418 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1419 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1420 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1422 push @return, tva_to_line($config, $lowest_tva);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1423 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1424 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1426 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1427 foreach my $tv(@$tvs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1428 next if(defined $config->{coding_only} && !($tv->affects_cds));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1430 push @return, map {tva_to_line($config, $_)} @{$tv->get_all_alternate_TranscriptVariationAlleles};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1432 undef $tv->{$_} for keys %$tv;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1433 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1434 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1435
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1436 return \@return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1437 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1438
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1439 # get consequences for a structural variation feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1440 sub svf_to_consequences {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1441 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1442 my $svf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1443
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1444 my @return = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1446 my $term_method = $config->{terms}.'_term';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1448 if(defined $config->{whole_genome}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1449 $svf->{transcript_structural_variations} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1450 $svf->{regulation_structural_variations}->{$_} ||= [] for @REG_FEAT_TYPES;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1451 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1452
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1453 if ((my $iv = $svf->get_IntergenicStructuralVariation(1)) && !defined($config->{no_intergenic})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1454
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1455 for my $iva (@{ $iv->get_all_alternate_IntergenicStructuralVariationAlleles }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1456
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1457 my $line = init_line($config, $svf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1458
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1459 $line->{Allele} = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1461 my $cons = $iva->get_all_OverlapConsequences->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1463 $line->{Consequence} = $cons->$term_method || $cons->SO_term;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1464
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1465 $line = run_plugins($iva, $line, $config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1467 push @return, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1468 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1469 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1471 foreach my $svo(@{$svf->get_all_StructuralVariationOverlaps}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1472
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1473 next if $svo->isa('Bio::EnsEMBL::Variation::IntergenicStructuralVariation');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1475 my $feature = $svo->feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1477 # get feature type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1478 my $feature_type = (split '::', ref($feature))[-1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1480 my $base_line = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1481 Feature_type => $feature_type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1482 Feature => $feature->stable_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1483 Allele => $svf->class_SO_term,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1484 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1485
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1486 if($svo->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariation')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1487 $base_line->{cDNA_position} = format_coords($svo->cdna_start, $svo->cdna_end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1488 $base_line->{CDS_position} = format_coords($svo->cds_start, $svo->cds_end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1489 $base_line->{Protein_position} = format_coords($svo->translation_start, $svo->translation_end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1490 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1491
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1492 foreach my $svoa(@{$svo->get_all_StructuralVariationOverlapAlleles}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1493 my $line = init_line($config, $svf, $base_line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1495 $line->{Consequence} = join ",",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1496 #map {s/feature/$feature_type/e; $_}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1497 map {$_->$term_method}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1498 sort {$a->rank <=> $b->rank}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1499 @{$svoa->get_all_OverlapConsequences};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1501 # work out overlap amounts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1502 my $overlap_start = (sort {$a <=> $b} ($svf->start, $feature->start))[-1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1503 my $overlap_end = (sort {$a <=> $b} ($svf->end, $feature->end))[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1504 my $overlap_length = ($overlap_end - $overlap_start) + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1505 my $overlap_pc = 100 * ($overlap_length / (($feature->end - $feature->start) + 1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1507 $line->{Extra}->{OverlapBP} = $overlap_length if $overlap_length > 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1508 $line->{Extra}->{OverlapPC} = sprintf("%.2f", $overlap_pc) if $overlap_pc > 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1509
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1510 add_extra_fields($config, $line, $svoa);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1512 push @return, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1513 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1514 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1515
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1516 return \@return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1517 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1519 # run all of the configured plugins on a VariationFeatureOverlapAllele instance
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1520 # and store any results in the provided line hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1521 sub run_plugins {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1523 my ($bvfoa, $line_hash, $config) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1524
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1525 my $skip_line = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1526
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1527 for my $plugin (@{ $config->{plugins} || [] }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1529 # check that this plugin is interested in this type of variation feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1531 if ($plugin->check_variant_feature_type(ref $bvfoa->base_variation_feature)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1532
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1533 # check that this plugin is interested in this type of feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1534
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1535 if ($plugin->check_feature_type(ref $bvfoa->feature || 'Intergenic')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1537 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1538 my $plugin_results = $plugin->run($bvfoa, $line_hash);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1539
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1540 if (defined $plugin_results) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1541 if (ref $plugin_results eq 'HASH') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1542 for my $key (keys %$plugin_results) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1543 $line_hash->{Extra}->{$key} = $plugin_results->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1544 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1545 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1546 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1547 warn "Plugin '".(ref $plugin)."' did not return a hashref, output ignored!\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1548 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1549 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1550 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1551 # if a plugin returns undef, that means it want to filter out this line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1552 $skip_line = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1553 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1554 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1555 if ($@) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1556 warn "Plugin '".(ref $plugin)."' went wrong: $@";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1557 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1559 # there's no point running any other plugins if we're filtering this line,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1560 # because the first plugin to skip the line wins, so we might as well last
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1561 # out of the loop now and avoid any unnecessary computation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1563 last if $skip_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1564 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1565 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1566 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1567
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1568 return $skip_line ? undef : $line_hash;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1569 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1570
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1571 # turn a TranscriptVariationAllele into a line hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1572 sub tva_to_line {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1573 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1574 my $tva = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1576 my $tv = $tva->transcript_variation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1577 my $t = $tv->transcript;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1579 # method name for consequence terms
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1580 my $term_method = $config->{terms}.'_term';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1581
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1582 my $base_line = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1583 Feature_type => 'Transcript',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1584 Feature => (defined $t ? $t->stable_id : undef),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1585 cDNA_position => format_coords($tv->cdna_start, $tv->cdna_end),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1586 CDS_position => format_coords($tv->cds_start, $tv->cds_end),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1587 Protein_position => format_coords($tv->translation_start, $tv->translation_end),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1588 Allele => $tva->variation_feature_seq,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1589 Amino_acids => $tva->pep_allele_string,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1590 Codons => $tva->display_codon_allele_string,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1591 Consequence => join ",", map {$_->$term_method || $_->SO_term} sort {$a->rank <=> $b->rank} @{$tva->get_all_OverlapConsequences},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1592 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1593
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1594 my $line = init_line($config, $tva->variation_feature, $base_line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1595
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1596 # HGVS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1597 if(defined $config->{hgvs}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1598 my $hgvs_t = $tva->hgvs_transcript;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1599 my $hgvs_p = $tva->hgvs_protein;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1601 $line->{Extra}->{HGVSc} = $hgvs_t if $hgvs_t;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1602 $line->{Extra}->{HGVSp} = $hgvs_p if $hgvs_p;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1603 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1604
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1605 foreach my $tool (qw(SIFT PolyPhen)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1606 my $lc_tool = lc($tool);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1607
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1608 if (my $opt = $config->{$lc_tool}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1609 my $want_pred = $opt =~ /^p/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1610 my $want_score = $opt =~ /^s/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1611 my $want_both = $opt =~ /^b/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1613 if ($want_both) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1614 $want_pred = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1615 $want_score = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1616 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1618 next unless $want_pred || $want_score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1619
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1620 my $pred_meth = $lc_tool.'_prediction';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1621 my $score_meth = $lc_tool.'_score';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1622
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1623 my $pred = $tva->$pred_meth;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1624
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1625 if($pred) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1626
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1627 if ($want_pred) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1628 $pred =~ s/\s+/\_/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1629 $line->{Extra}->{$tool} = $pred;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1630 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1632 if ($want_score) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1633 my $score = $tva->$score_meth;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1634
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1635 if(defined $score) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1636 if($want_pred) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1637 $line->{Extra}->{$tool} .= "($score)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1638 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1639 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1640 $line->{Extra}->{$tool} = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1641 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1642 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1643 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1644 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1645 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1646 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1647
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1648 $line = add_extra_fields($config, $line, $tva);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1649
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1650 return $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1651 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1652
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1653 sub add_extra_fields {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1654 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1655 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1656 my $bvfoa = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1657
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1658 # overlapping SVs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1659 if(defined $config->{check_svs} && defined $bvfoa->base_variation_feature->{overlapping_svs}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1660 $line->{Extra}->{SV} = $bvfoa->base_variation_feature->{overlapping_svs};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1661 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1662
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1663 # add transcript-specific fields
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1664 $line = add_extra_fields_transcript($config, $line, $bvfoa) if $bvfoa->isa('Bio::EnsEMBL::Variation::BaseTranscriptVariationAllele');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1665
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1666 # run plugins
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1667 $line = run_plugins($bvfoa, $line, $config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1668
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1669 return $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1670 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1671
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1672 sub add_extra_fields_transcript {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1673 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1674 my $line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1675 my $tva = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1677 my $tv = $tva->base_variation_feature_overlap;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1678 my $tr = $tva->transcript;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1679
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1680 # get gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1681 my $gene;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1682
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1683 $line->{Gene} = $tr->{_gene_stable_id};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1684
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1685 if(!defined($line->{Gene})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1686 $gene = $config->{ga}->fetch_by_transcript_stable_id($tr->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1687 $line->{Gene} = $gene ? $gene->stable_id : '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1688 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1690 # exon/intron numbers
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1691 if ($config->{numbers}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1692 $line->{Extra}->{EXON} = $tv->exon_number if defined $tv->exon_number;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1693 $line->{Extra}->{INTRON} = $tv->intron_number if defined $tv->intron_number;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1694 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1696 if ($config->{domains}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1697 my $feats = $tv->get_overlapping_ProteinFeatures;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1698
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1699 my @strings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1700
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1701 for my $feat (@$feats) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1702 my $label = $feat->analysis->display_label.':'.$feat->hseqname;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1703
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1704 # replace any special characters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1705 $label =~ s/[\s;=]/_/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1706
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1707 push @strings, $label;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1708 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1710 $line->{Extra}->{DOMAINS} = join ',', @strings if @strings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1711 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1712
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1713 # distance to transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1714 if($line->{Consequence} =~ /(up|down)stream/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1715 $line->{Extra}->{DISTANCE} = $tv->distance_to_transcript;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1716 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1717
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1718 # HGNC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1719 if(defined $config->{hgnc}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1720 my $hgnc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1721 $hgnc = $tr->{_gene_hgnc};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1722
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1723 if(!defined($hgnc)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1724 if(!defined($gene)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1725 $gene = $config->{ga}->fetch_by_transcript_stable_id($tr->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1726 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1727
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1728 my @entries = grep {$_->database eq 'HGNC'} @{$gene->get_all_DBEntries()};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1729 if(scalar @entries) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1730 $hgnc = $entries[0]->display_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1731 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1732 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1733
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1734 $hgnc = undef if defined($hgnc) && $hgnc eq '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1735
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1736 $line->{Extra}->{HGNC} = $hgnc if defined($hgnc);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1737 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1738
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1739 # CCDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1740 if(defined($config->{ccds})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1741 my $ccds = $tr->{_ccds};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1742
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1743 if(!defined($ccds)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1744 my @entries = grep {$_->database eq 'CCDS'} @{$tr->get_all_DBEntries};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1745 $ccds = $entries[0]->display_id if scalar @entries;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1746 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1747
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1748 $ccds = undef if defined($ccds) && $ccds eq '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1749
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1750 $line->{Extra}->{CCDS} = $ccds if defined($ccds);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1751 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1752
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1753 # refseq xref
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1754 if(defined($config->{xref_refseq})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1755 my $refseq = $tr->{_refseq};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1756
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1757 if(!defined($refseq)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1758 my @entries = grep {$_->database eq 'RefSeq_mRNA'} @{$tr->get_all_DBEntries};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1759 if(scalar @entries) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1760 $refseq = join ",", map {$_->display_id."-".$_->database} @entries;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1761 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1762 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1764 $refseq = undef if defined($refseq) && $refseq eq '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1765
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1766 $line->{Extra}->{RefSeq} = $refseq if defined($refseq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1767 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1769 # protein ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1770 if(defined $config->{protein}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1771 my $protein = $tr->{_protein};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1772
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1773 if(!defined($protein)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1774 $protein = $tr->translation->stable_id if defined($tr->translation);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1775 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1776
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1777 $protein = undef if defined($protein) && $protein eq '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1778
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1779 $line->{Extra}->{ENSP} = $protein if defined($protein);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1780 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1781
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1782 # canonical transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1783 if(defined $config->{canonical}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1784 $line->{Extra}->{CANONICAL} = 'YES' if $tr->is_canonical;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1785 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1786
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1787 return $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1788 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1789
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1790 # initialize a line hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1791 sub init_line {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1792 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1793 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1794 my $base_line = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1795
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1796 my $line = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1797 Uploaded_variation => $vf->variation_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1798 Location => ($vf->{chr} || $vf->seq_region_name).':'.format_coords($vf->start, $vf->end),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1799 Existing_variation => $vf->{existing},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1800 Extra => {},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1801 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1802
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1803 # add custom info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1804 if(defined($config->{custom}) && scalar @{$config->{custom}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1805 # merge the custom hash with the extra hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1806 my $custom = get_custom_annotation($config, $vf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1807
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1808 for my $key (keys %$custom) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1809 $line->{Extra}->{$key} = $custom->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1810 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1811 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1812
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1813 # individual?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1814 $line->{Extra}->{IND} = $vf->{individual} if defined($vf->{individual});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1815
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1816 # frequencies?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1817 $line->{Extra}->{FREQS} = join ",", @{$vf->{freqs}} if defined($vf->{freqs});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1818
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1819 # gmaf?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1820 $line->{Extra}->{GMAF} = $vf->{gmaf} if defined($config->{gmaf}) && defined($vf->{gmaf});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1821
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1822 # copy entries from base_line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1823 if(defined($base_line)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1824 $line->{$_} = $base_line->{$_} for keys %$base_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1825 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1826
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1827 return $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1828 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1829
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1830
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1831 # get custom annotation for a single VF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1832 sub get_custom_annotation {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1833 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1834 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1835 my $cache = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1836
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1837 return $vf->{custom} if defined($vf->{custom});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1838
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1839 my $annotation = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1840
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1841 my $chr = $vf->{chr};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1842
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1843 if(!defined($cache)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1844 # spoof regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1845 my $regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1846 $regions->{$chr} = [$vf->{start}.'-'.$vf->{end}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1847 $cache = cache_custom_annotation($config, $regions, $chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1848 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1849
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1850 foreach my $custom(@{$config->{custom}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1851 next unless defined($cache->{$chr}->{$custom->{name}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1852
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1853 # exact type must match coords of variant exactly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1854 if($custom->{type} eq 'exact') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1855 foreach my $feature(values %{$cache->{$chr}->{$custom->{name}}->{$vf->{start}}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1856
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1857 next unless
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1858 $feature->{chr} eq $chr &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1859 $feature->{start} eq $vf->{start} &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1860 $feature->{end} eq $vf->{end};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1861
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1862 $annotation->{$custom->{name}} .= $feature->{name}.',';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1863 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1864 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1865
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1866 # overlap type only needs to overlap, but we need to search the whole range
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1867 elsif($custom->{type} eq 'overlap') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1868 foreach my $pos(keys %{$cache->{$chr}->{$custom->{name}}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1869 foreach my $feature(values %{$cache->{$chr}->{$custom->{name}}->{$pos}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1870
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1871 next unless
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1872 $feature->{chr} eq $chr &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1873 $feature->{end} >= $vf->{start} &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1874 $feature->{start} <= $vf->{end};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1875
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1876 $annotation->{$custom->{name}} .= $feature->{name}.',';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1877 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1878 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1879 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1880
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1881 # trim off trailing commas
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1882 $annotation->{$custom->{name}} =~ s/\,$//g if defined($annotation->{$custom->{name}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1883 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1884
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1885 return $annotation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1886 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1887
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1888 # decides whether to print a VF based on user defined consequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1889 sub filter_by_consequence {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1890 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1891 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1892 my $filters = $config->{filter};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1893
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1894 # find it if we only have "no"s
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1895 my $only_nos = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1896 $only_nos = 1 if (sort {$a <=> $b} values %$filters)[-1] == 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1897
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1898 my ($yes, $no) = (0, 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1899
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1900 # get all consequences across all term types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1901 my @types = ('SO', 'display');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1902
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1903 my @cons;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1904 push @cons, @{$vf->consequence_type($_)} for @types;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1905
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1906 # add regulatory consequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1907 if(defined($config->{regulatory})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1908 foreach my $term_type(@types) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1909 my $term_method = $term_type.'_term';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1910
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1911 for my $rfv (@{ $vf->get_all_RegulatoryFeatureVariations }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1912 for my $rfva(@{$rfv->get_all_alternate_RegulatoryFeatureVariationAlleles}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1913 push @cons, map {$_->$term_method} @{ $rfva->get_all_OverlapConsequences };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1914 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1915 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1916 for my $mfv (@{ $vf->get_all_MotifFeatureVariations }) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1917 for my $mfva(@{$mfv->get_all_alternate_MotifFeatureVariationAlleles}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1918 push @cons, map {$_->$term_method} @{ $mfva->get_all_OverlapConsequences };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1919 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1920 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1921 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1922 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1923
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1924 foreach my $con(grep {defined($_) && defined($filters->{$_})} @cons) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1925 if($filters->{$con} == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1926 $yes = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1927 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1928 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1929 $no = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1930 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1931 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1932
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1933 # check special case, coding
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1934 if(defined($filters->{coding})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1935 if(grep {$_->affects_cds} @{$vf->get_all_TranscriptVariations}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1936 if($filters->{coding} == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1937 $yes = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1938 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1939 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1940 $no = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1941 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1942 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1943 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1944
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1945 my $ok = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1946 if($only_nos) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1947 $ok = 1 if !$no;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1948 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1949 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1950 $ok = 1 if $yes && !$no;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1951 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1952
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1953 return $ok;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1954 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1955
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1956
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1957 # takes VFs created from input, fixes and checks various things
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1958 sub validate_vf {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1959 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1960 my $vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1962 # user specified chr skip list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1963 return 0 if defined($config->{chr}) && !$config->{chr}->{$vf->{chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1964
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1965 # fix inputs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1966 $vf->{chr} =~ s/chr//ig unless $vf->{chr} =~ /^chromosome$/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1967 $vf->{chr} = 'MT' if $vf->{chr} eq 'M';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1968 $vf->{strand} ||= 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1969 $vf->{strand} = ($vf->{strand} =~ /\-/ ? "-1" : "1");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1970
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1971 # sanity checks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1972 unless($vf->{start} =~ /^\d+$/ && $vf->{end} =~ /^\d+$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1973 warn("WARNING: Start ".$vf->{start}." or end ".$vf->{end}." coordinate invalid on line ".$config->{line_number}."\n") unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1974 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1975 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1976
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1977 # structural variation?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1978 return validate_svf($config, $vf) if $vf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1979
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1980 # uppercase allele string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1981 $vf->{allele_string} =~ tr/[a-z]/[A-Z]/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1982
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1983 unless($vf->{allele_string} =~ /([ACGT-]+\/*)+/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1984 warn("WARNING: Invalid allele string ".$vf->{allele_string}." on line ".$config->{line_number}." or possible parsing error\n") unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1985 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1986 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1987
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1988 # insertion should have start = end + 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1989 if($vf->{allele_string} =~ /^\-\// && $vf->{start} != $vf->{end} + 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1990 warn(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1991 "WARNING: Alleles look like an insertion (".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1992 $vf->{allele_string}.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1993 ") but coordinates are not start = end + 1 (START=".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1994 $vf->{start}.", END=".$vf->{end}.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1995 ") on line ".$config->{line_number}."\n"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1996 ) unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1997 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1998 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1999
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2000 # check length of reference matches seq length spanned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2001 my @alleles = split /\//, $vf->{allele_string};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2002 my $ref_allele = shift @alleles;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2003 my $tmp_ref_allele = $ref_allele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2004 $tmp_ref_allele =~ s/\-//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2005
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2006 #if(($vf->{end} - $vf->{start}) + 1 != length($tmp_ref_allele)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2007 # warn(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2008 # "WARNING: Length of reference allele (".$ref_allele.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2009 # " length ".length($tmp_ref_allele).") does not match co-ordinates ".$vf->{start}."-".$vf->{end}.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2010 # " on line ".$config->{line_number}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2011 # ) unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2012 # return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2013 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2014
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2015 # flag as unbalanced
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2016 foreach my $allele(@alleles) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2017 $allele =~ s/\-//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2018 $vf->{indel} = 1 unless length($allele) == length($tmp_ref_allele);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2019 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2020
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2021 # check reference allele if requested
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2022 if(defined $config->{check_ref}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2023 my $ok = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2024 my $slice_ref_allele;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2025
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2026 # insertion, therefore no ref allele to check
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2027 if($ref_allele eq '-') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2028 $ok = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2029 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2030 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2031 my $slice_ref = $vf->{slice}->sub_Slice($vf->{start}, $vf->{end}, $vf->{strand});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2032
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2033 if(!defined($slice_ref)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2034 warn "WARNING: Could not fetch sub-slice from ".$vf->{start}."\-".$vf->{end}."\(".$vf->{strand}."\) on line ".$config->{line_number} unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2035 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2036
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2037 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2038 $slice_ref_allele = $slice_ref->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2039 $ok = ($slice_ref_allele eq $ref_allele ? 1 : 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2040 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2041 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2042
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2043 if(!$ok) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2044 warn
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2045 "WARNING: Specified reference allele $ref_allele ",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2046 "does not match Ensembl reference allele",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2047 ($slice_ref_allele ? " $slice_ref_allele" : ""),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2048 " on line ".$config->{line_number} unless defined $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2049 return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2050 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2051 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2052
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2053 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2054 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2055
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2057 # validate a structural variation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2058 sub validate_svf {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2059 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2060 my $svf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2061
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2062 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2063 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2064
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2065
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2066 # takes a hash of VFs and fetches consequences by pre-fetching overlapping transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2067 # from database and/or cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2068 sub whole_genome_fetch {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2069 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2070 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2071 my $vf_hash = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2072
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2073 my (%vf_done, @finished_vfs, %seen_rfs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2074
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2075 if(defined($config->{offline}) && !-e $config->{dir}.'/'.$chr) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2076 debug("No cache found for chromsome $chr") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2077
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2078 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2079 foreach my $pos(keys %{$vf_hash->{$chr}{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2080 push @finished_vfs, @{$vf_hash->{$chr}{$chunk}{$pos}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2081 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2082 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2083
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2084 return \@finished_vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2085 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2086
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2087 my $slice_cache = $config->{slice_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2088 build_slice_cache($config, $config->{tr_cache}) unless defined($slice_cache->{$chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2089
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2090 debug("Analyzing chromosome $chr") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2091
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2092 # custom annotations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2093 whole_genome_fetch_custom($config, $vf_hash, $chr) if defined($config->{custom});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2094
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2095 # split up normal variations from SVs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2096 my ($tmp_vf_hash, @svfs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2097
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2098 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2099 foreach my $pos(keys %{$vf_hash->{$chr}{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2100 foreach my $vf(@{$vf_hash->{$chr}{$chunk}{$pos}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2101 if($vf->isa('Bio::EnsEMBL::Variation::StructuralVariationFeature')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2102 push @svfs, $vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2103 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2104 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2105 push @{$tmp_vf_hash->{$chr}{$chunk}{$pos}}, $vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2106 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2107 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2108 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2109 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2111 $vf_hash = $tmp_vf_hash;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2113 # transcript annotations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2114 whole_genome_fetch_transcript($config, $vf_hash, $chr)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2115 unless defined($config->{no_consequences});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2117 # regulatory annotations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2118 whole_genome_fetch_reg($config, $vf_hash, $chr)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2119 if defined($config->{regulatory})
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2120 && !defined($config->{no_consequences});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2122 # structural variations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2123 @finished_vfs = @{whole_genome_fetch_sv($config, \@svfs, $chr)}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2124 if scalar @svfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2126 # sort results into @finished_vfs array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2127 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2128 foreach my $pos(keys %{$vf_hash->{$chr}{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2130 # pinch slice from slice cache if we don't already have it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2131 $_->{slice} ||= $slice_cache->{$chr} for @{$vf_hash->{$chr}{$chunk}{$pos}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2133 if(defined($config->{regulatory})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2134 foreach my $type(@REG_FEAT_TYPES) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2135 $_->{regulation_variations}->{$type} ||= [] for @{$vf_hash->{$chr}{$chunk}{$pos}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2136 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2137 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2139 if(defined($config->{custom})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2140 $_->{custom} ||= {} for @{$vf_hash->{$chr}{$chunk}{$pos}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2143 $_->{transcript_variations} ||= {} for @{$vf_hash->{$chr}{$chunk}{$pos}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2145 # add to final array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2146 push @finished_vfs, @{$vf_hash->{$chr}{$chunk}{$pos}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2150 # sort
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2151 @finished_vfs = sort {$a->{start} <=> $b->{start} || $a->{end} <=> $b->{end}} @finished_vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2153 # clean hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2154 delete $vf_hash->{$chr};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2156 return \@finished_vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2157 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2159 sub whole_genome_fetch_custom {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2160 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2161 my $vf_hash = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2162 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2164 return unless scalar @{$config->{custom}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2166 # create regions based on VFs instead of chunks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2167 my $tmp_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2169 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2170 foreach my $pos(keys %{$vf_hash->{$chr}{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2171 foreach my $vf(@{$vf_hash->{$chr}{$chunk}{$pos}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2172 push @{$tmp_regions->{$chr}}, ($vf->{start}-1).'-'.($vf->{end}+1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2173 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2174 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2175 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2177 return unless defined($tmp_regions->{$chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2179 # cache annotations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2180 my $annotation_cache = cache_custom_annotation($config, $tmp_regions, $chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2182 # count and report
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2183 my $total_annotations = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2184 $total_annotations += scalar keys %{$annotation_cache->{$chr}->{$_}} for keys %{$annotation_cache->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2185 debug("Retrieved $total_annotations custom annotations (", (join ", ", map {(scalar keys %{$annotation_cache->{$chr}->{$_}}).' '.$_} keys %{$annotation_cache->{$chr}}), ")");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2187 # compare annotations to variations in hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2188 debug("Analyzing custom annotations") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2189 my $total = scalar keys %{$vf_hash->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2190 my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2192 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2193 progress($config, $i++, $total);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2195 foreach my $pos(keys %{$vf_hash->{$chr}{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2196 foreach my $vf(@{$vf_hash->{$chr}{$chunk}{$pos}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2198 $vf->{custom} = get_custom_annotation($config, $vf, $annotation_cache);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2199 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2200 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2201 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2203 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2204 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2206 sub whole_genome_fetch_transcript {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2207 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2208 my $vf_hash = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2209 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2211 my $tr_cache = $config->{tr_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2212 my $slice_cache = $config->{slice_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2214 my $up_down_size = MAX_DISTANCE_FROM_TRANSCRIPT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2216 # check we have defined regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2217 return unless defined($vf_hash->{$chr}) && defined($tr_cache->{$chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2219 # copy slice from transcript to slice cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2220 $slice_cache = build_slice_cache($config, $tr_cache) unless defined($slice_cache->{$chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2222 debug("Analyzing variants") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2224 my $tr_counter = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2225 my $tr_count = scalar @{$tr_cache->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2227 while($tr_counter < $tr_count) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2229 progress($config, $tr_counter, $tr_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2230 print PARENT "BUMP\n" if defined($config->{forked});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2232 my $tr = $tr_cache->{$chr}->[$tr_counter++];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2234 # do each overlapping VF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2235 my $s = $tr->start - $up_down_size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2236 my $e = $tr->end + $up_down_size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2238 # get the chunks this transcript overlaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2239 my %chunks;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2240 $chunks{$_} = 1 for (int($s/$config->{chunk_size})..int($e/$config->{chunk_size}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2241 map {delete $chunks{$_} unless defined($vf_hash->{$chr}{$_})} keys %chunks;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2243 # pointer to previous VF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2244 # used to tell plugins this is the last variant analysed in this transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2245 my $previous_vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2247 foreach my $chunk(keys %chunks) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2248 foreach my $vf(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2249 grep {$_->{start} <= $e && $_->{end} >= $s}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2250 map {@{$vf_hash->{$chr}{$chunk}{$_}}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2251 keys %{$vf_hash->{$chr}{$chunk}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2252 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2253 # pinch slice from slice cache if we don't already have it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2254 $vf->{slice} ||= $slice_cache->{$chr};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2255
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2256 my $tv = Bio::EnsEMBL::Variation::TranscriptVariation->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2257 -transcript => $tr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2258 -variation_feature => $vf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2259 -adaptor => $config->{tva},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2260 -no_ref_check => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2261 -no_transfer => 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2262 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2264 # prefetching stuff here prevents doing loads at the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2265 # end and makes progress reporting more useful
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2266 $tv->_prefetch_for_vep;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2268 $vf->add_TranscriptVariation($tv);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2270 # cache VF on the transcript if it is an unbalanced sub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2271 push @{$tr->{indels}}, $vf if defined($vf->{indel});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2272
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2273 if(defined($config->{individual})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2275 # store VF on transcript, weaken reference to avoid circularity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2276 push @{$tr->{vfs}}, $vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2277 weaken($tr->{vfs}->[-1]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2279 delete $previous_vf->{last_in_transcript}->{$tr->stable_id};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2280 $vf->{last_in_transcript}->{$tr->stable_id} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2281 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2282
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2283 $previous_vf = $vf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2284 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2285 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2288 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2289 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2291 sub whole_genome_fetch_reg {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2292 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2293 my $vf_hash = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2294 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2296 my $rf_cache = $config->{rf_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2297 my $slice_cache = $config->{slice_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2299 foreach my $type(keys %{$rf_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2300 debug("Analyzing ".$type."s") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2302 my $constructor = 'Bio::EnsEMBL::Variation::'.$type.'Variation';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2303
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2304 my $rf_counter = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2305 my $rf_count = scalar @{$rf_cache->{$chr}->{$type}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2307 while($rf_counter < $rf_count) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2309 progress($config, $rf_counter, $rf_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2310 print PARENT "BUMP\n" if defined($config->{forked});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2312 my $rf = $rf_cache->{$chr}->{$type}->[$rf_counter++];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2314 # do each overlapping VF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2315 my $s = $rf->{start};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2316 my $e = $rf->{end};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2318 # get the chunks this transcript overlaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2319 my %chunks;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2320 $chunks{$_} = 1 for (int($s/$config->{chunk_size})..int($e/$config->{chunk_size}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2321 map {delete $chunks{$_} unless defined($vf_hash->{$chr}{$_})} keys %chunks;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2323 foreach my $chunk(keys %chunks) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2324 foreach my $vf(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2325 grep {$_->{start} <= $e && $_->{end} >= $s}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2326 map {@{$vf_hash->{$chr}{$chunk}{$_}}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2327 keys %{$vf_hash->{$chr}{$chunk}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2328 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2329 push @{$vf->{regulation_variations}->{$type}}, $constructor->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2330 -variation_feature => $vf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2331 -feature => $rf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2332 -no_ref_check => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2333 -no_transfer => 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2334 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2335 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2336 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2337 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2339 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2340 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2341 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2343 sub whole_genome_fetch_sv {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2344 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2345 my $svfs = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2346 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2348 my $tr_cache = $config->{tr_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2349 my $rf_cache = $config->{rf_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2350 my $slice_cache = $config->{slice_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2352 debug("Analyzing structural variations") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2354 my($i, $total) = (0, scalar @$svfs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2355
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2356 my @finished_vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2357
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2358 foreach my $svf(@$svfs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2359 progress($config, $i++, $total);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2360
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2361 my %done_genes = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2362
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2363 if(defined($tr_cache->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2364 foreach my $tr(grep {overlap($_->{start} - MAX_DISTANCE_FROM_TRANSCRIPT, $_->{end} + MAX_DISTANCE_FROM_TRANSCRIPT, $svf->{start}, $svf->{end})} @{$tr_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2365 my $svo = Bio::EnsEMBL::Variation::TranscriptStructuralVariation->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2366 -transcript => $tr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2367 -structural_variation_feature => $svf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2368 -no_transfer => 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2369 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2371 $svf->add_TranscriptStructuralVariation($svo);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2372 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2373 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2375 $svf->{transcript_structural_variations} ||= {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2377 # do regulatory features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2378 if(defined($config->{regulatory}) && defined($rf_cache->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2379 foreach my $rf_type(qw/RegulatoryFeature/) {#keys %{$rf_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2380 foreach my $rf(grep {$_->{start} <= $svf->{end} && $_->end >= $svf->{end}} @{$rf_cache->{$chr}->{$rf_type}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2381 my $svo = Bio::EnsEMBL::Variation::StructuralVariationOverlap->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2382 -feature => $rf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2383 -structural_variation_feature => $svf,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2384 -no_transfer => 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2385 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2386
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2387 push @{$svf->{regulation_structural_variations}->{$rf_type}}, $svo;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2388 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2390 $svf->{regulation_structural_variations}->{$rf_type} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2391 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2392 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2394 # sort them
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2395 #$svf->_sort_svos;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2396 push @finished_vfs, $svf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2397 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2398
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2399 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2400
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2401 return \@finished_vfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2402 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2404 # retrieves transcripts given region list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2405 sub fetch_transcripts {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2406 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2407 my $regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2408 my $trim_regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2409
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2410 my $tr_cache = $config->{tr_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2411 my $slice_cache = $config->{slice_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2412
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2413 my ($count_from_mem, $count_from_db, $count_from_cache, $count_duplicates, $count_trimmed) = (0, 0, 0, 0, 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2415 my %seen_trs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2417 $count_from_mem = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2418 my $region_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2419 foreach my $chr(keys %{$regions}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2420 $count_from_mem += scalar @{$tr_cache->{$chr}} if defined($tr_cache->{$chr}) && ref($tr_cache->{$chr}) eq 'ARRAY';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2421 $region_count += scalar @{$regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2422 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2424 my $counter;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2426 debug("Reading transcript data from cache and/or database") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2427
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2428 foreach my $chr(keys %{$regions}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2429 foreach my $region(sort {(split /\-/, $a)[0] <=> (split /\-/, $b)[1]} @{$regions->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2430 progress($config, $counter++, $region_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2431
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2432 # skip regions beyond the end of the chr
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2433 next if defined($slice_cache->{$chr}) && (split /\-/, $region)[0] > $slice_cache->{$chr}->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2435 next if defined($config->{loaded_tr}->{$chr}->{$region});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2436
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2437 # force quiet so other methods don't mess up the progress bar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2438 my $quiet = $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2439 $config->{quiet} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2441 # try and load cache from disk if using cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2442 my $tmp_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2443 if(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2444 #$tmp_cache = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2445 # defined($config->{tabix}) ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2446 # load_dumped_transcript_cache_tabix($config, $chr, $region, $trim_regions) :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2447 # load_dumped_transcript_cache($config, $chr, $region)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2448 #);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2449 $tmp_cache = load_dumped_transcript_cache($config, $chr, $region);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2450 $count_from_cache += scalar @{$tmp_cache->{$chr}} if defined($tmp_cache->{$chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2451 $config->{loaded_tr}->{$chr}->{$region} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2452 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2454 # no cache found on disk or not using cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2455 if(!defined($tmp_cache->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2456
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2457 if(defined($config->{offline})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2458 # restore quiet status
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2459 $config->{quiet} = $quiet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2461 debug("WARNING: Could not find cache for $chr\:$region") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2462 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2463 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2464
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2465 # spoof temporary region hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2466 my $tmp_hash;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2467 push @{$tmp_hash->{$chr}}, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2469 $tmp_cache = cache_transcripts($config, $tmp_hash);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2471 # make it an empty arrayref that gets cached
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2472 # so we don't get confused and reload next time round
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2473 $tmp_cache->{$chr} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2475 $count_from_db += scalar @{$tmp_cache->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2477 # dump to disk if writing to cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2478 (defined($config->{tabix}) ? dump_transcript_cache_tabix($config, $tmp_cache, $chr, $region) : dump_transcript_cache($config, $tmp_cache, $chr, $region)) if defined($config->{write_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2479
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2480 $config->{loaded_tr}->{$chr}->{$region} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2481 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2483 # add loaded transcripts to main cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2484 if(defined($tmp_cache->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2485 while(my $tr = shift @{$tmp_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2487 # track already added transcripts by dbID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2488 my $dbID = $tr->dbID;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2489 if($seen_trs{$dbID}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2490 $count_duplicates++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2491 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2492 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2493
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2494 # trim out?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2495 #if(defined($trim_regions) && defined($trim_regions->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2496 # my $tmp_count = scalar grep {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2497 # overlap(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2498 # (split /\-/, $_)[0], (split /\-/, $_)[1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2499 # $tr->{start}, $tr->{end}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2500 # )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2501 # } @{$trim_regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2502 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2503 # if(!$tmp_count) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2504 # $count_trimmed++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2505 # next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2506 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2507 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2508
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2509 $seen_trs{$dbID} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2511 push @{$tr_cache->{$chr}}, $tr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2512 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2513 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2515 $tr_cache->{$chr} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2516
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2517 undef $tmp_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2519 # restore quiet status
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2520 $config->{quiet} = $quiet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2521
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2522 # build slice cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2523 $slice_cache = build_slice_cache($config, $tr_cache) unless defined($slice_cache->{$chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2524 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2525 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2526
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2527 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2528
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2529 my $tr_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2530 $tr_count += scalar @{$tr_cache->{$_}} for keys %$tr_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2532 debug("Retrieved $tr_count transcripts ($count_from_mem mem, $count_from_cache cached, $count_from_db DB, $count_duplicates duplicates)") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2533
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2534 return $tr_count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2535 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2537 sub fetch_regfeats {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2538 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2539 my $regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2540 my $trim_regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2541
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2542 my $rf_cache = $config->{rf_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2543 my $slice_cache = $config->{slice_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2544
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2545 my ($count_from_mem, $count_from_db, $count_from_cache, $count_duplicates, $count_trimmed) = (0, 0, 0, 0, 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2546
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2547 my %seen_rfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2548
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2549 $count_from_mem = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2550 my $region_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2551
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2552 foreach my $chr(keys %$regions) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2553 if(defined($rf_cache->{$chr}) && ref($rf_cache->{$chr}) eq 'HASH') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2554 $count_from_mem += scalar @{$rf_cache->{$chr}->{$_}} for keys %{$rf_cache->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2555 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2556 $region_count += scalar @{$regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2557 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2559 my $counter = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2561 debug("Reading regulatory data from cache and/or database") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2563 foreach my $chr(keys %$regions) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2564 foreach my $region(sort {(split /\-/, $a)[0] cmp (split /\-/, $b)[1]} @{$regions->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2565 progress($config, $counter++, $region_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2567 next if defined($config->{loaded_rf}->{$chr}->{$region});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2569 # skip regions beyond the end of the chr
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2570 next if defined($slice_cache->{$chr}) && (split /\-/, $region)[0] > $slice_cache->{$chr}->length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2571
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2572 # force quiet so other methods don't mess up the progress bar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2573 my $quiet = $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2574 $config->{quiet} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2576 # try and load cache from disk if using cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2577 my $tmp_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2578 if(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2579 $tmp_cache = load_dumped_reg_feat_cache($config, $chr, $region);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2581 #$tmp_cache =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2582 # defined($config->{tabix}) ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2583 # load_dumped_reg_feat_cache_tabix($config, $chr, $region, $trim_regions) :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2584 # load_dumped_reg_feat_cache($config, $chr, $region);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2586
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2587 if(defined($tmp_cache->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2588 $count_from_cache += scalar @{$tmp_cache->{$chr}->{$_}} for keys %{$tmp_cache->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2589 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2590
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2591 # flag as loaded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2592 $config->{loaded_rf}->{$chr}->{$region} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2593 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2595 # no cache found on disk or not using cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2596 if(!defined($tmp_cache->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2598 if(defined($config->{offline})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2599
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2600 # restore quiet status
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2601 $config->{quiet} = $quiet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2602
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2603 debug("WARNING: Could not find cache for $chr\:$region") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2604 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2605 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2606
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2607 # spoof temporary region hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2608 my $tmp_hash;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2609 push @{$tmp_hash->{$chr}}, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2610
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2611 $tmp_cache = cache_reg_feats($config, $tmp_hash);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2613 # make it an empty arrayref that gets cached
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2614 # so we don't get confused and reload next time round
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2615 $tmp_cache->{$chr} ||= {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2616
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2617 $count_from_db += scalar @{$tmp_cache->{$chr}->{$_}} for keys %{$tmp_cache->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2618
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2619 # dump to disk if writing to cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2620 #dump_reg_feat_cache($config, $tmp_cache, $chr, $region) if defined($config->{write_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2621 (defined($config->{tabix}) ? dump_reg_feat_cache_tabix($config, $tmp_cache, $chr, $region) : dump_reg_feat_cache($config, $tmp_cache, $chr, $region)) if defined($config->{write_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2622
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2623 # restore deleted coord_system adaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2624 foreach my $type(keys %{$tmp_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2625 $_->{slice}->{coord_system}->{adaptor} = $config->{csa} for @{$tmp_cache->{$chr}->{$type}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2626 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2628 # flag as loaded
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2629 $config->{loaded_rf}->{$chr}->{$region} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2630 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2632 # add loaded reg_feats to main cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2633 if(defined($tmp_cache->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2634 foreach my $type(keys %{$tmp_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2635 while(my $rf = shift @{$tmp_cache->{$chr}->{$type}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2636
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2637 # filter on cell type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2638 if(defined($config->{cell_type}) && scalar(@{$config->{cell_type}})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2639 next unless grep {$rf->{cell_types}->{$_}} @{$config->{cell_type}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2640 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2641
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2642 # trim out?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2643 #if(defined($trim_regions) && defined($trim_regions->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2644 # my $tmp_count = scalar grep {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2645 # overlap(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2646 # (split /\-/, $_)[0], (split /\-/, $_)[1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2647 # $rf->{start}, $rf->{end}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2648 # )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2649 # } @{$trim_regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2650 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2651 # if(!$tmp_count) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2652 # $count_trimmed++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2653 # next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2654 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2655 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2656
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2657 # track already added reg_feats by dbID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2658 my $dbID = $rf->{dbID};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2659 if($seen_rfs{$dbID}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2660 $count_duplicates++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2661 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2662 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2663 $seen_rfs{$dbID} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2664
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2665 push @{$rf_cache->{$chr}->{$type}}, $rf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2666 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2667 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2668 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2669
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2670 undef $tmp_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2671
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2672 # restore quiet status
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2673 $config->{quiet} = $quiet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2674 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2675 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2677 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2678
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2679 my $rf_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2681 foreach my $chr(keys %$rf_cache) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2682 foreach my $type(keys %{$rf_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2683 $rf_count += scalar @{$rf_cache->{$chr}->{$type}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2684 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2685 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2687 debug("Retrieved $rf_count regulatory features ($count_from_mem mem, $count_from_cache cached, $count_from_db DB, $count_duplicates duplicates)") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2689 return $rf_count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2690 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2691
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2692 # gets existing VFs for a vf_hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2693 sub check_existing_hash {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2694 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2695 my $vf_hash = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2696 my $variation_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2697
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2698 # we only care about non-SVs here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2699 my %new_hash;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2700
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2701 foreach my $chr(keys %{$vf_hash}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2702 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2703 foreach my $pos(keys %{$vf_hash->{$chr}->{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2704 foreach my $var(grep {$_->isa('Bio::EnsEMBL::Variation::VariationFeature')} @{$vf_hash->{$chr}->{$chunk}->{$pos}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2705 push @{$new_hash{$chr}->{$chunk}->{$pos}}, $var;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2706 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2707 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2708 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2709 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2710
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2711 $vf_hash = \%new_hash;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2712
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2713 debug("Checking for existing variations") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2714
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2715 my ($chunk_count, $counter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2716 $chunk_count += scalar keys %{$vf_hash->{$_}} for keys %{$vf_hash};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2717
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2718 foreach my $chr(keys %{$vf_hash}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2719
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2720 my %loaded_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2721
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2722 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2723 progress($config, $counter++, $chunk_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2724
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2725 # get the VFs for this chunk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2726 my ($start, $end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2727
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2728 # work out start and end using chunk_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2729 $start = $config->{chunk_size} * $chunk;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2730 $end = $config->{chunk_size} * ($chunk + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2732 # using cache?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2733 if(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2734 my $tmp_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2735 push @{$tmp_regions->{$chr}}, $start.'-'.$end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2736
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2737 my $converted_regions = convert_regions($config, $tmp_regions);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2738
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2739 foreach my $region(@{$converted_regions->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2740
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2741 unless($loaded_regions{$region}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2742 my $tmp_cache = load_dumped_variation_cache($config, $chr, $region);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2743
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2744 # load from DB if not found in cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2745 if(!defined($tmp_cache->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2746 if(defined($config->{offline})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2747 debug("WARNING: Could not find variation cache for $chr\:$region") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2748 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2749 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2750
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2751 $tmp_cache->{$chr} = get_variations_in_region($config, $chr, $region);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2752 dump_variation_cache($config, $tmp_cache, $chr, $region) if defined($config->{write_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2753 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2754
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2755 # merge tmp_cache with the main cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2756 foreach my $key(keys %{$tmp_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2757 $variation_cache->{$chr}->{$key} = $tmp_cache->{$chr}->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2758 delete $tmp_cache->{$chr}->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2759 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2760
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2761 # clear memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2762 undef $tmp_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2763
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2764 # record this region as fetched
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2765 $loaded_regions{$region} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2766 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2767 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2768 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2769
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2770 # no cache, get all variations in region from DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2771 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2772
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2773 my ($min, $max);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2774
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2775 # we can fetch smaller region when using DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2776 foreach my $pos(keys %{$vf_hash->{$chr}->{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2777 foreach my $var(@{$vf_hash->{$chr}->{$chunk}->{$pos}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2778 foreach my $coord(qw(start end)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2779 $min = $var->{$coord} if !defined($min) || $var->{$coord} < $min;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2780 $max = $var->{$coord} if !defined($max) || $var->{$coord} > $max;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2781 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2782 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2783 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2784
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2785 $variation_cache->{$chr} = get_variations_in_region($config, $chr, $min.'-'.$max);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2786 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2787
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2788 # now compare retrieved vars with vf_hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2789 foreach my $pos(keys %{$vf_hash->{$chr}->{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2790 foreach my $var(@{$vf_hash->{$chr}->{$chunk}->{$pos}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2791 my @found;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2792 my @gmafs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2794 if(defined($variation_cache->{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2795 if(my $existing_vars = $variation_cache->{$chr}->{$pos}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2796 foreach my $existing_var(grep {$_->[1] <= $config->{failed}} @$existing_vars) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2797 unless(is_var_novel($config, $existing_var, $var)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2798 push @found, $existing_var->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2799 push @gmafs, $existing_var->[6].":".$existing_var->[7] if defined($existing_var->[6]) && defined($existing_var->[7]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2800 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2801 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2802 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2803 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2804
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2805 $var->{existing} = join ",", @found;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2806 $var->{existing} ||= '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2807
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2808 $var->{gmaf} = join ",", @gmafs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2809 $var->{gmaf} ||= undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2810 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2811 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2812 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2813
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2814 delete $variation_cache->{$chr};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2815 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2816
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2817 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2818 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2819
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2820 # gets overlapping SVs for a vf_hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2821 sub check_svs_hash {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2822 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2823 my $vf_hash = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2824
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2825 debug("Checking for overlapping structural variations") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2826
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2827 my ($chunk_count, $counter);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2828 $chunk_count += scalar keys %{$vf_hash->{$_}} for keys %{$vf_hash};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2829
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2830 foreach my $chr(keys %{$vf_hash}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2831 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2832
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2833 progress($config, $counter++, $chunk_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2834
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2835 # work out start and end using chunk_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2836 my ($start, $end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2837 $start = $config->{chunk_size} * $chunk;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2838 $end = $config->{chunk_size} * ($chunk + 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2839
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2840 # check for structural variations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2841 if(defined($config->{sa})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2842 my $slice = $config->{sa}->fetch_by_region('chromosome', $chr, $start, $end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2843
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2844 if(defined($slice)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2845 my $svs = $config->{svfa}->fetch_all_by_Slice($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2846
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2847 foreach my $pos(keys %{$vf_hash->{$chr}->{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2848 foreach my $var(@{$vf_hash->{$chr}->{$chunk}->{$pos}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2849 my $string = join ",",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2850 map {$_->variation_name}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2851 grep {$_->seq_region_start <= $var->end && $_->seq_region_end >= $var->start}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2852 @$svs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2853
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2854 $var->{overlapping_svs} = $string if $string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2855 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2856 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2857 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2858 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2859 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2860 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2861
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2862 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2863 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2864
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2865 # gets a slice from the slice adaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2866 sub get_slice {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2867 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2868 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2869 my $otherfeatures = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2870 $otherfeatures ||= '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2871
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2872 return undef unless defined($config->{sa}) && defined($chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2873
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2874 my $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2875
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2876 # first try to get a chromosome
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2877 eval { $slice = $config->{$otherfeatures.'sa'}->fetch_by_region('chromosome', $chr); };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2878
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2879 # if failed, try to get any seq region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2880 if(!defined($slice)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2881 $slice = $config->{$otherfeatures.'sa'}->fetch_by_region(undef, $chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2882 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2883
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2884 return $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2885 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2886
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2887
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2888
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2889
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2890 # METHODS THAT DEAL WITH "REGIONS"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2891 ##################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2892
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2893 # gets regions from VF hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2894 sub regions_from_hash {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2895 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2896 my $vf_hash = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2897
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2898 my %include_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2899
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2900 # if using cache we just want the regions of cache_region_size
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2901 # since that's what we'll get from the cache (or DB if no cache found)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2902 if(defined($config->{cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2903
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2904 my $region_size = $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2905
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2906 foreach my $chr(keys %$vf_hash) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2907 $include_regions{$chr} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2908 my %temp_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2909
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2910 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2911 foreach my $pos(keys %{$vf_hash->{$chr}{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2912 my ($s, $e) = ($pos - MAX_DISTANCE_FROM_TRANSCRIPT, $pos + MAX_DISTANCE_FROM_TRANSCRIPT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2913
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2914 my $low = int ($s / $region_size);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2915 my $high = int ($e / $region_size) + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2916
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2917 for my $i($low..($high - 1)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2918 $temp_regions{(($i * $region_size) + 1).'-'.(($i + 1) * $region_size)} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2919 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2920 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2921 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2922
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2923 @{$include_regions{$chr}} = keys %temp_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2924 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2925 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2926
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2927 # if no cache we don't want to fetch more than is necessary, so find the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2928 # minimum covered region of the variations in the hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2929 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2930 foreach my $chr(keys %$vf_hash) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2931 $include_regions{$chr} = [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2932
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2933 foreach my $chunk(keys %{$vf_hash->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2934 foreach my $pos(keys %{$vf_hash->{$chr}{$chunk}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2935 add_region($_->start, $_->end, $include_regions{$chr}) for @{$vf_hash->{$chr}{$chunk}{$pos}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2936 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2937 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2938 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2939
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2940 # merge regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2941 merge_regions(\%include_regions, $config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2942 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2943
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2944 return \%include_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2945 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2946
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2947 # adds a region to region list, expanding existing one if overlaps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2948 sub add_region {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2949 my $start = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2950 my $end = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2951 my $region_list = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2952
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2953 # fix end for insertions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2954 $end = $start if $end < $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2955
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2956 my $added = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2957 my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2958
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2959 while ($i < scalar @$region_list) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2960 my ($region_start, $region_end) = split /\-/, $region_list->[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2962 if($start <= $region_end && $end >= $region_start) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2963 my $new_region_start = ($start < $end ? $start : $end) - MAX_DISTANCE_FROM_TRANSCRIPT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2964 my $new_region_end = ($start > $end ? $start : $end) + MAX_DISTANCE_FROM_TRANSCRIPT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2965
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2966 $new_region_start = 1 if $new_region_start < 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2967
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2968 $region_start = $new_region_start if $new_region_start < $region_start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2969 $region_end = $new_region_end if $new_region_end > $region_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2970
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2971 $region_list->[$i] = $region_start.'-'.$region_end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2972 $added = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2973 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2974
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2975 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2976 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2977
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2978 unless($added) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2979 my $s = $start - MAX_DISTANCE_FROM_TRANSCRIPT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2980 $s = 1 if $s < 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2981
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2982 push @{$region_list}, $s.'-'.($end + MAX_DISTANCE_FROM_TRANSCRIPT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2983 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2984 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2985
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2986 # merges overlapping regions from scans
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2987 sub merge_regions {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2988 my $include_regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2989 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2990 my $consecutive = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2991 $consecutive ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2992
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2993 # now merge overlapping regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2994 foreach my $chr(keys %$include_regions) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2995 my $max_index = $#{$include_regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2996 my (@new_regions, %skip);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2997
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2998 for my $i(0..$max_index) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2999 next if $skip{$i};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3000 my ($s, $e) = split /\-/, $include_regions->{$chr}[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3001
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3002 for my $j(($i+1)..$max_index) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3003 next if $skip{$j};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3004 my ($ns, $ne) = split /\-/, $include_regions->{$chr}[$j];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3005
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3006 if($s <= ($ne + $consecutive) && $e >= ($ns - $consecutive)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3007 $s = $ns if $ns < $s;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3008 $e = $ne if $ne > $e;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3009
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3010 $skip{$j} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3011 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3012 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3013
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3014 push @new_regions, $s.'-'.$e;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3015 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3016
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3017 # replace original
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3018 $include_regions->{$chr} = \@new_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3019
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3020 $config->{region_count} += scalar @new_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3021 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3022
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3023 return $include_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3024 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3025
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3026 # converts regions as determined by scan_file to regions loadable from cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3027 sub convert_regions {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3028 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3029 my $regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3030
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3031 return undef unless defined $regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3032
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3033 my $region_size = $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3034
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3035 my %new_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3036
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3037 foreach my $chr(keys %$regions) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3038 my %temp_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3039
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3040 foreach my $region(@{$regions->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3041 my ($s, $e) = split /\-/, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3042
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3043 my $low = int ($s / $region_size);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3044 my $high = int ($e / $region_size) + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3045
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3046 for my $i($low..($high - 1)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3047 $temp_regions{(($i * $region_size) + 1).'-'.(($i + 1) * $region_size)} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3048 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3049 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3050
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3051 @{$new_regions{$chr}} = keys %temp_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3052 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3053
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3054 return \%new_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3055 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3057
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3058
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3059
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3060
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3061 # CACHE METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3062 ###############
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3063
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3064 # prunes a cache to get rid of features not in regions in use
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3065 sub prune_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3066 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3067 my $cache = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3068 my $regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3069 my $loaded = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3070
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3071 # delete no longer in use chroms
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3072 foreach my $chr(keys %$cache) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3073 delete $cache->{$chr} unless defined $regions->{$chr} && scalar @{$regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3074 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3076 my $new_count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3077
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3078 foreach my $chr(keys %$cache) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3079
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3080 # get total area spanned by regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3081 my ($min, $max);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3082 foreach my $region(@{$regions->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3083 my ($s, $e) = split /\-/, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3084 $min = $s if !defined($min) or $s < $min;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3085 $max = $e if !defined($max) or $e > $max;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3086 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3087
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3088 # transcript cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3089 if(ref($cache->{$chr}) eq 'ARRAY') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3090 $cache->{$chr} = prune_min_max($cache->{$chr}, $min, $max);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3091 $new_count += scalar @{$cache->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3092 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3093 # regfeat cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3094 elsif(ref($cache->{$chr}) eq 'HASH') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3095 for(keys %{$cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3096 $cache->{$chr}->{$_} = prune_min_max($cache->{$chr}->{$_}, $min, $max);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3097 $new_count += scalar @{$cache->{$chr}->{$_}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3098 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3099 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3101 # update loaded regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3102 my %have_regions = map {$_ => 1} @{$regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3104 foreach my $region(keys %{$loaded->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3105 delete $loaded->{$chr}->{$region} unless defined $have_regions{$region};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3106 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3107 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3109 return $new_count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3110 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3112 # does the actual pruning
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3113 sub prune_min_max {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3114 my $array = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3115 my $min = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3116 my $max = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3118 # splice out features not in area spanned by min/max
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3119 my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3120 my $f_count = scalar @{$array};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3121 my @new_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3122
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3123 while($i < $f_count) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3124 my $f = $array->[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3126 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3127
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3128 if($max - $f->start() > 0 && $f->end - $min > 0) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3129 push @new_cache, $f;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3130 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3132 # do some cleaning for transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3133 elsif(defined $f->{translation}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3134 delete $f->{translation}->{transcript};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3135 delete $f->{translation};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3136 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3137 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3139 undef $array;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3140 return \@new_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3141 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3143 # get transcripts for slices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3144 sub cache_transcripts {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3145 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3146 my $include_regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3148 my $tr_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3149 my $i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3151 debug("Caching transcripts") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3153 foreach my $chr(keys %$include_regions) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3155 my $slice = get_slice($config, $chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3157 next unless defined $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3159 # prefetch some things
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3160 $slice->is_circular;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3162 # trim bumf off the slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3163 delete $slice->{coord_system}->{adaptor} if defined($config->{write_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3165 # no regions?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3166 if(!scalar @{$include_regions->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3167 my $start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3168 my $end = $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3169
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3170 while($start < $slice->end) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3171 push @{$include_regions->{$chr}}, $start.'-'.$end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3172 $start += $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3173 $end += $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3174 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3175 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3176
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3177 my $region_count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3178
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3179 if(scalar keys %$include_regions == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3180 my ($chr) = keys %$include_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3181 $region_count = scalar @{$include_regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3182 debug("Caching transcripts for chromosome $chr") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3183 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3185 foreach my $region(@{$include_regions->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3186 progress($config, $i++, $region_count || $config->{region_count});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3188 my ($s, $e) = split /\-/, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3190 # sanity check start and end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3191 $s = 1 if $s < 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3192 $e = $slice->end if $e > $slice->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3194 # get sub-slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3195 my $sub_slice = $slice->sub_Slice($s, $e);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3197 # for some reason unless seq is called here the sequence becomes Ns later
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3198 $sub_slice->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3200 # add transcripts to the cache, via a transfer to the chrom's slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3201 if(defined($sub_slice)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3202 foreach my $gene(map {$_->transfer($slice)} @{$sub_slice->get_all_Genes(undef, undef, 1)}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3203 my $gene_stable_id = $gene->stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3204 my $canonical_tr_id = $gene->{canonical_transcript_id};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3206 my @trs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3208 foreach my $tr(@{$gene->get_all_Transcripts}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3209 $tr->{_gene_stable_id} = $gene_stable_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3210 $tr->{_gene} = $gene;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3212 # indicate if canonical
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3213 $tr->{is_canonical} = 1 if defined $canonical_tr_id and $tr->dbID eq $canonical_tr_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3215 if(defined($config->{prefetch})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3216 prefetch_transcript_data($config, $tr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3217 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3219 # CCDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3220 elsif(defined($config->{ccds})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3221 my @entries = grep {$_->database eq 'CCDS'} @{$tr->get_all_DBEntries};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3222 $tr->{_ccds} = $entries[0]->display_id if scalar @entries;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3223 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3225 # strip some unnecessary data from the transcript object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3226 clean_transcript($tr) if defined($config->{write_cache});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3228 push @trs, $tr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3229 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3230
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3231 # sort the transcripts by translation so we can share sift/polyphen stuff
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3232 # between transcripts and save cache space
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3233 if(defined($config->{write_cache}) && (defined($config->{sift}) || defined($config->{polyphen}))) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3235 my $prev_tr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3236
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3237 # sort them by peptide seqeuence as transcripts with identical peptides
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3238 # will have identical SIFT/PolyPhen prediction strings
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3239 foreach my $tr(sort {$a->{_variation_effect_feature_cache}->{peptide} cmp $b->{_variation_effect_feature_cache}->{peptide}} grep {$_->{_variation_effect_feature_cache}->{peptide}} @trs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3241 if(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3242 defined($prev_tr) &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3243 $prev_tr->{_variation_effect_feature_cache}->{peptide}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3244 eq $tr->{_variation_effect_feature_cache}->{peptide}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3245 ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3247 foreach my $analysis(qw(sift polyphen)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3248 next unless defined($config->{$analysis});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3249 $tr->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis} = $prev_tr->{_variation_effect_feature_cache}->{protein_function_predictions}->{$analysis};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3250 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3251 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3252
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3253 $prev_tr = $tr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3254 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3255 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3256
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3257 # clean the gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3258 clean_gene($gene);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3259
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3260 push @{$tr_cache->{$chr}}, @trs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3261 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3262 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3263 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3264 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3266 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3268 return $tr_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3269 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3271 # gets rid of extra bits of info attached to the transcript that we don't need
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3272 sub clean_transcript {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3273 my $tr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3275 foreach my $key(qw(display_xref external_db external_display_name external_name external_status created_date status description edits_enabled modified_date dbentries is_current analysis transcript_mapper)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3276 delete $tr->{$key} if defined($tr->{$key});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3277 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3279 # clean all attributes but miRNA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3280 if(defined($tr->{attributes})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3281 my @new_atts;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3282 foreach my $att(@{$tr->{attributes}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3283 push @new_atts, $att if $att->{code} eq 'miRNA';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3284 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3285 $tr->{attributes} = \@new_atts;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3286 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3288 # clean the translation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3289 if(defined($tr->translation)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3291 # sometimes the translation points to a different transcript?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3292 $tr->{translation}->{transcript} = $tr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3293 weaken($tr->{translation}->{transcript});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3295 for my $key(qw(attributes protein_features created_date modified_date)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3296 delete $tr->translation->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3297 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3298 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3299 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3300
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3301 # gets rid of extra bits of info attached to genes. At the moment this is almost
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3302 # everything as genes are only used for their locations when looking at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3303 # structural variations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3304 sub clean_gene {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3305 my $gene = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3307 # delete almost everything in the gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3308 map {delete $gene->{$_}}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3309 grep {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3310 $_ ne 'start' &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3311 $_ ne 'end' &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3312 $_ ne 'strand' &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3313 $_ ne 'stable_id'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3314 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3315 keys %{$gene};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3316 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3318 # build slice cache from transcript cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3319 sub build_slice_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3320 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3321 my $tr_cache = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3322
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3323 my %slice_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3324
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3325 foreach my $chr(keys %$tr_cache) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3326 $slice_cache{$chr} = scalar @{$tr_cache->{$chr}} ? $tr_cache->{$chr}[0]->slice : &get_slice($config, $chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3328 if(!defined($slice_cache{$chr})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3329 delete $slice_cache{$chr}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3330 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3331
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3332 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3333 # reattach adaptor to the coord system
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3334 $slice_cache{$chr}->{coord_system}->{adaptor} ||= $config->{csa};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3335 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3336 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3337
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3338 return \%slice_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3339 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3341 # pre-fetches per-transcript data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3342 sub prefetch_transcript_data {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3343 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3344 my $tr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3346 # introns
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3347 my $introns = $tr->get_all_Introns;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3348
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3349 if(defined($introns)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3350 foreach my $intron(@$introns) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3351 foreach my $key(qw(adaptor analysis dbID next prev seqname)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3352 delete $intron->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3353 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3354 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3355 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3356
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3357 $tr->{_variation_effect_feature_cache}->{introns} ||= $introns;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3359 # translateable_seq, mapper
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3360 $tr->{_variation_effect_feature_cache}->{translateable_seq} ||= $tr->translateable_seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3361 $tr->{_variation_effect_feature_cache}->{mapper} ||= $tr->get_TranscriptMapper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3362
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3363 # peptide
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3364 unless ($tr->{_variation_effect_feature_cache}->{peptide}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3365 my $translation = $tr->translate;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3366 $tr->{_variation_effect_feature_cache}->{peptide} = $translation ? $translation->seq : undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3367 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3368
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3369 # protein features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3370 if(defined($config->{domains}) || defined($config->{write_cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3371 my $pfs = $tr->translation ? $tr->translation->get_all_ProteinFeatures : [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3373 # clean them to save cache space
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3374 foreach my $pf(@$pfs) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3375
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3376 # remove everything but the coord, analysis and ID fields
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3377 foreach my $key(keys %$pf) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3378 delete $pf->{$key} unless
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3379 $key eq 'start' ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3380 $key eq 'end' ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3381 $key eq 'analysis' ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3382 $key eq 'hseqname';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3383 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3384
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3385 # remove everything from the analysis but the display label
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3386 foreach my $key(keys %{$pf->{analysis}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3387 delete $pf->{analysis}->{$key} unless $key eq '_display_label';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3388 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3389 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3390
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3391 $tr->{_variation_effect_feature_cache}->{protein_features} = $pfs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3392 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3394 # codon table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3395 unless ($tr->{_variation_effect_feature_cache}->{codon_table}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3396 # for mithocondrial dna we need to to use a different codon table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3397 my $attrib = $tr->slice->get_all_Attributes('codon_table')->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3398
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3399 $tr->{_variation_effect_feature_cache}->{codon_table} = $attrib ? $attrib->value : 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3400 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3401
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3402 # sift/polyphen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3403 if(defined($config->{pfpma}) && defined($tr->{_variation_effect_feature_cache}->{peptide})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3404 foreach my $analysis(qw(sift polyphen)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3405 next unless defined($config->{$analysis});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3406 my $a = $analysis;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3407 $a .= '_humvar' if $a eq 'polyphen';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3408 $tr->{_variation_effect_feature_cache}->{protein_function_predictions}->{$a} ||= $config->{pfpma}->fetch_by_analysis_translation_md5($a, md5_hex($tr->{_variation_effect_feature_cache}->{peptide}))
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3409 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3410 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3411
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3412 # gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3413 $tr->{_gene} ||= $config->{ga}->fetch_by_transcript_stable_id($tr->stable_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3415 # gene HGNC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3416 if(defined $config->{hgnc}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3417
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3418 # get from gene cache if found already
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3419 if(defined($tr->{_gene}->{_hgnc})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3420 $tr->{_gene_hgnc} = $tr->{_gene}->{_hgnc};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3421 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3422 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3423 my @entries = grep {$_->database eq 'HGNC'} @{$tr->{_gene}->get_all_DBEntries()};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3424 if(scalar @entries) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3425 $tr->{_gene_hgnc} = $entries[0]->display_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3426 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3427
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3428 $tr->{_gene_hgnc} ||= '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3429
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3430 # cache it on the gene object too
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3431 $tr->{_gene}->{_hgnc} = $tr->{_gene_hgnc};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3432 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3433 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3435 # CCDS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3436 my @entries = grep {$_->database eq 'CCDS'} @{$tr->get_all_DBEntries};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3437 $tr->{_ccds} = $entries[0]->display_id if scalar @entries;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3438 $tr->{_ccds} ||= '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3439
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3440 # refseq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3441 @entries = grep {$_->database eq 'RefSeq_mRNA'} @{$tr->get_all_DBEntries};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3442 if(scalar @entries) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3443 $tr->{_refseq} = join ",", map {$_->display_id} @entries;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3444 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3445 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3446 $tr->{_refseq} = '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3447 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3448
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3449 # protein stable ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3450 $tr->{_protein} = $tr->translation ? $tr->translation->stable_id : '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3452 return $tr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3453 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3454
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3455 sub get_dump_file_name {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3456 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3457 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3458 my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3459 my $type = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3461 $type ||= 'transcript';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3463 if($type eq 'transcript') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3464 $type = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3465 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3466 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3467 $type = '_'.$type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3468 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3469
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3470 #my ($s, $e) = split /\-/, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3471 #my $subdir = int($s / 1e6);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3472 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3473 #my $dir = $config->{dir}.'/'.$chr.'/'.$subdir;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3475 my $dir = $config->{dir}.'/'.$chr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3476 my $dump_file = $dir.'/'.$region.$type.(defined($config->{tabix}) ? '_tabix' : '').'.gz';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3477
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3478 # make directory if it doesn't exist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3479 if(!(-e $dir) && defined($config->{write_cache})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3480 make_path($dir);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3481 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3483 return $dump_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3484 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3485
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3486 # dumps out transcript cache to file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3487 sub dump_transcript_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3488 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3489 my $tr_cache = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3490 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3491 my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3492
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3493 debug("Dumping cached transcript data") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3494
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3495 # clean the slice adaptor before storing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3496 clean_slice_adaptor($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3498 strip_transcript_cache($config, $tr_cache);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3499
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3500 $config->{reg}->disconnect_all;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3501 delete $config->{sa}->{dbc}->{_sql_helper};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3503 my $dump_file = get_dump_file_name($config, $chr, $region, 'transcript');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3505 debug("Writing to $dump_file") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3506
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3507 # storable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3508 open my $fh, "| gzip -9 -c > ".$dump_file or die "ERROR: Could not write to dump file $dump_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3509 nstore_fd($tr_cache, $fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3510 close $fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3511 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3512
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3513 #sub dump_transcript_cache_tabix {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3514 # my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3515 # my $tr_cache = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3516 # my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3517 # my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3518 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3519 # debug("Dumping cached transcript data") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3520 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3521 # # clean the slice adaptor before storing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3522 # clean_slice_adaptor($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3523 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3524 # strip_transcript_cache($config, $tr_cache);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3525 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3526 # $config->{reg}->disconnect_all;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3527 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3528 # my $dir = $config->{dir}.'/'.$chr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3529 # my $dump_file = $dir.'/'.($region || "dump").'_tabix.gz';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3530 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3531 # # make directory if it doesn't exist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3532 # if(!(-e $dir)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3533 # make_path($dir);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3534 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3535 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3536 # debug("Writing to $dump_file") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3537 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3538 # use Storable qw(nfreeze);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3539 # use MIME::Base64 qw(encode_base64);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3540 # #open NEW, "| bgzip -c > ".$dump_file or die "ERROR: Could not write to dump file $dump_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3541 # #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3542 # #foreach my $tr(sort {$a->start <=> $b->start} @{$tr_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3543 # # print NEW join "\t", (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3544 # # $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3545 # # $tr->start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3546 # # $tr->end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3547 # # encode_base64(freeze($tr), "")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3548 # # );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3549 # # print NEW "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3550 # #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3551 # #close NEW;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3552 # #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3553 # ## tabix it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3554 # #my $output = `tabix -s 1 -b 2 -e 3 -f $dump_file 2>&1`;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3555 # #die("ERROR: Failed during tabix indexing\n$output\n") if $output;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3556 # open NEW, "| gzip -9 -c > ".$dump_file or die "ERROR: Could not write to dump file $dump_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3557 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3558 # foreach my $tr(sort {$a->start <=> $b->start} @{$tr_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3559 # print NEW join "\t", (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3560 # $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3561 # $tr->start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3562 # $tr->end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3563 # encode_base64(freeze($tr), "")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3564 # );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3565 # print NEW "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3566 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3567 # close NEW;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3568 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3570 # loads in dumped transcript cache to memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3571 sub load_dumped_transcript_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3572 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3573 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3574 my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3576 my $dump_file = get_dump_file_name($config, $chr, $region, 'transcript');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3577
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3578 return undef unless -e $dump_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3579
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3580 debug("Reading cached transcript data for chromosome $chr".(defined $region ? "\:$region" : "")." from dumped file") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3581
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3582 open my $fh, $config->{compress}." ".$dump_file." |" or return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3583 my $tr_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3584 $tr_cache = fd_retrieve($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3585 close $fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3586
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3587 # reattach adaptors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3588 foreach my $t(@{$tr_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3589 if(defined($t->{translation})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3590 $t->{translation}->{adaptor} = $config->{tra} if defined $t->{translation}->{adaptor};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3591 $t->{translation}->{transcript} = $t;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3592 weaken($t->{translation}->{transcript});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3593 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3594
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3595 $t->{slice}->{adaptor} = $config->{sa};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3596 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3597
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3598 return $tr_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3599 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3601 #sub load_dumped_transcript_cache_tabix {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3602 # my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3603 # my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3604 # my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3605 # my $trim_regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3606 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3607 # my $dir = $config->{dir}.'/'.$chr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3608 # my $dump_file = $dir.'/'.($region || "dump").'_tabix.gz';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3609 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3610 # #print STDERR "Reading from $dump_file\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3611 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3612 # return undef unless -e $dump_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3613 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3614 # debug("Reading cached transcript data for chromosome $chr".(defined $region ? "\:$region" : "")." from dumped file") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3615 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3616 # my $tr_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3617 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3618 # use MIME::Base64 qw(decode_base64);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3619 # use Storable qw(thaw);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3620 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3621 # my ($s, $e) = split /\-/, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3622 # my @regions = grep {overlap($s, $e, (split /\-/, $_))} @{$trim_regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3623 # my $regions = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3624 # $regions .= " $chr\:$_" for @regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3625 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3626 # #print STDERR "tabix $dump_file $regions |\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3627 # #open IN, "tabix $dump_file $regions |";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3628 # open IN, "gzip -dc $dump_file |";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3629 # while(<IN>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3630 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3631 # #$DB::single = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3632 # my ($chr, $start, $end, $blob) = split /\t/, $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3633 # next unless grep {overlap($start, $end, (split /\-/, $_))} @regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3634 # my $tr = thaw(decode_base64($blob));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3635 # push @{$tr_cache->{$chr}}, $tr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3636 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3637 # close IN;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3638 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3639 # # reattach adaptors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3640 # foreach my $t(@{$tr_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3641 # if(defined($t->{translation})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3642 # $t->{translation}->{adaptor} = $config->{tra} if defined $t->{translation}->{adaptor};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3643 # $t->{translation}->{transcript} = $t;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3644 # weaken($t->{translation}->{transcript});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3645 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3646 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3647 # $t->{slice}->{adaptor} = $config->{sa};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3648 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3649 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3650 # # add empty array ref so code doesn't try and fetch from DB too
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3651 # $tr_cache->{$chr} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3652 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3653 # return $tr_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3654 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3655
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3656 # strips cache before writing to disk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3657 sub strip_transcript_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3658 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3659 my $cache = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3660
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3661 foreach my $chr(keys %$cache) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3662 foreach my $tr(@{$cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3663 foreach my $exon(@{$tr->{_trans_exon_array}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3664 delete $exon->{slice}->{adaptor};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3665
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3666 for(qw(adaptor created_date modified_date is_current version is_constitutive _seq_cache dbID slice)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3667 delete $exon->{$_};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3668 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3669 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3670
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3671 delete $tr->{adaptor};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3672 delete $tr->{slice}->{adaptor};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3673 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3674 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3675 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3676
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3677 # cleans slice adaptor before storing in cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3678 sub clean_slice_adaptor{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3679 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3680
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3681 # clean some stuff off the slice adaptor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3682 delete $config->{sa}->{asm_exc_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3683 $config->{sa}->{sr_name_cache} = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3684 $config->{sa}->{sr_id_cache} = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3685 delete $config->{sa}->{db}->{seq_region_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3686 delete $config->{sa}->{db}->{name_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3687 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3688
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3690 # dump adaptors to cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3691 sub dump_adaptor_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3692 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3693
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3694 $config->{reg}->disconnect_all;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3695 delete $config->{sa}->{dbc}->{_sql_helper};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3697 my $dir = $config->{dir};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3698 my $dump_file = $dir.'/adaptors.gz';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3699
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3700 # make directory if it doesn't exist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3701 if(!(-e $dir)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3702 make_path($dir);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3703 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3704
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3705 open my $fh, "| gzip -9 -c > ".$dump_file or die "ERROR: Could not write to dump file $dump_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3706 nstore_fd($config, $fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3707 close $fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3708 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3709
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3710 # load dumped adaptors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3711 sub load_dumped_adaptor_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3712 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3713
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3714 my $dir = $config->{dir};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3715 my $dump_file = $dir.'/adaptors.gz';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3716
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3717 return undef unless -e $dump_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3718
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3719 debug("Reading cached adaptor data") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3720
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3721 open my $fh, $config->{compress}." ".$dump_file." |" or return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3722 my $cached_config;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3723 $cached_config = fd_retrieve($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3724 close $fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3725
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3726 $config->{$_} = $cached_config->{$_} for qw(sa ga ta vfa svfa tva pfpma mca csa RegulatoryFeature_adaptor MotifFeature_adaptor);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3727
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3728 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3729 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3730
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3731 # dumps cached variations to disk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3732 sub dump_variation_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3733 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3734 my $v_cache = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3735 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3736 my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3737
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3738 my $dump_file = get_dump_file_name($config, $chr, $region, 'var');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3739
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3740 open DUMP, "| gzip -9 -c > ".$dump_file or die "ERROR: Could not write to adaptor dump file $dump_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3741
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3742 foreach my $pos(keys %{$v_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3743 foreach my $v(@{$v_cache->{$chr}->{$pos}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3744 my ($name, $failed, $start, $end, $as, $strand, $ma, $maf) = @$v;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3745
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3746 print DUMP join " ", (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3747 $name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3748 $failed == 0 ? '' : $failed,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3749 $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3750 $end == $start ? '' : $end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3751 $as,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3752 $strand == 1 ? '' : $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3753 $ma || '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3754 defined($maf) ? sprintf("%.2f", $maf) : '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3755 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3756 print DUMP "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3757 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3758 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3759
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3760 close DUMP;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3761 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3762
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3763 # loads dumped variation cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3764 sub load_dumped_variation_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3765 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3766 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3767 my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3769 my $dump_file = get_dump_file_name($config, $chr, $region, 'var');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3770
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3771 return undef unless -e $dump_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3772
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3773 open DUMP, $config->{compress}." ".$dump_file." |" or return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3774
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3775 my $v_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3776
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3777 while(<DUMP>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3778 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3779 my ($name, $failed, $start, $end, $as, $strand, $ma, $maf) = split / /, $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3780 $failed ||= 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3781 $end ||= $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3782 $strand ||= 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3783 $ma ||= undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3784 $maf ||= undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3786 my @v = ($name, $failed, $start, $end, $as, $strand, $ma, $maf);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3787 push @{$v_cache->{$chr}->{$start}}, \@v;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3788 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3789
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3790 close DUMP;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3791
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3792 return $v_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3793 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3794
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3795 # caches regulatory features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3796 sub cache_reg_feats {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3797 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3798 my $include_regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3799
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3800 my $rf_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3801 my $i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3802
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3803 debug("Caching regulatory features") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3804
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3805 foreach my $chr(keys %$include_regions) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3806
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3807 my $slice = get_slice($config, $chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3808
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3809 next unless defined $slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3810
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3811 # prefetch some things
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3812 $slice->is_circular;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3813
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3814 # no regions?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3815 if(!scalar @{$include_regions->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3816 my $start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3817 my $end = $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3818
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3819 while($start < $slice->end) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3820 push @{$include_regions->{$chr}}, $start.'-'.$end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3821 $start += $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3822 $end += $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3823 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3824 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3825
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3826 my $region_count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3827
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3828 if(scalar keys %$include_regions == 1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3829 my ($chr) = keys %$include_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3830 $region_count = scalar @{$include_regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3831 debug("Caching transcripts for chromosome $chr") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3832 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3833
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3834 foreach my $region(@{$include_regions->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3835 progress($config, $i++, $region_count || $config->{region_count});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3836
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3837 my ($s, $e) = split /\-/, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3838
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3839 # sanity check start and end
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3840 $s = 1 if $s < 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3841 $e = $slice->end if $e > $slice->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3842
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3843 # get sub-slice
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3844 my $sub_slice = $slice->sub_Slice($s, $e);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3845 $sub_slice->{coord_system}->{adaptor} = $config->{csa};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3846
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3847 next unless defined($sub_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3848
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3849 foreach my $type(@REG_FEAT_TYPES) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3850 my $features = $config->{$type.'_adaptor'}->fetch_all_by_Slice($sub_slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3851 next unless defined($features);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3852
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3853 # cell types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3854 if(defined($config->{cell_type}) && scalar(@{$config->{cell_type}})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3855 foreach my $rf(@$features) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3856
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3857 my %cl;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3858
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3859 # get cell type by fetching all from stable ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3860 if($type eq 'RegulatoryFeature') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3861 %cl = map {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3862 $_->feature_set->cell_type->name => $_->feature_type->name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3863 } @{$rf->adaptor->fetch_all_by_stable_ID($rf->stable_id)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3864 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3865
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3866 # get cell type by fetching regfeats that contain this MotifFeature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3867 elsif($type eq 'MotifFeature') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3868 %cl = map {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3869 $_->feature_set->cell_type->name => $_->feature_type->name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3870 } @{$config->{'RegulatoryFeature_adaptor'}->fetch_all_by_attribute_feature($rf)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3871 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3872
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3873 $rf->{cell_types} = \%cl;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3874 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3875 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3876
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3877 push @{$rf_cache->{$chr}->{$type}},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3878 map { clean_reg_feat($_) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3879 map { $_->transfer($slice) }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3880 @{$features};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3881 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3882 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3883 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3884
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3885 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3886
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3887 return $rf_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3888 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3889
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3890
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3891 # cleans reg feats for caching
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3892 sub clean_reg_feat {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3893 my $rf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3894
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3895 foreach my $key(qw/adaptor binary_string bound_start bound_end attribute_cache feature_type feature_set analysis/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3896 delete $rf->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3897 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3898
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3899 if(defined($rf->{binding_matrix})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3900 $rf->{_variation_effect_feature_cache}->{seq} = $rf->seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3901
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3902 foreach my $key(qw/adaptor feature_type analysis dbID/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3903 delete $rf->{binding_matrix}->{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3904 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3905 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3906
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3907 return $rf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3908 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3909
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3910
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3911 # dumps out reg feat cache to file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3912 sub dump_reg_feat_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3913 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3914 my $rf_cache = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3915 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3916 my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3917
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3918 debug("Dumping cached reg feat data for $chr:$region") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3919
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3920 # clean the slice adaptor before storing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3921 clean_slice_adaptor($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3922
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3923 $config->{reg}->disconnect_all;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3924 delete $config->{sa}->{dbc}->{_sql_helper};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3925
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3926 foreach my $chr(keys %{$rf_cache}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3927 foreach my $type(keys %{$rf_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3928 delete $_->{slice}->{coord_system}->{adaptor} for @{$rf_cache->{$chr}->{$type}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3929 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3930 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3931
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3932 my $dump_file = get_dump_file_name($config, $chr, $region, 'reg');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3933
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3934 debug("Writing to $dump_file") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3935
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3936 # storable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3937 open my $fh, "| gzip -9 -c > ".$dump_file or die "ERROR: Could not write to dump file $dump_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3938 nstore_fd($rf_cache, $fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3939 close $fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3940 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3941
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3942 #sub dump_reg_feat_cache_tabix {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3943 # my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3944 # my $rf_cache = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3945 # my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3946 # my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3947 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3948 # debug("Dumping cached reg feat data") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3949 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3950 # # clean the slice adaptor before storing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3951 # clean_slice_adaptor($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3952 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3953 # $config->{reg}->disconnect_all;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3954 # delete $config->{sa}->{dbc}->{_sql_helper};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3955 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3956 # $config->{reg}->disconnect_all;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3957 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3958 # my $dump_file = get_dump_file_name($config, $chr, $region, 'reg');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3959 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3960 # debug("Writing to $dump_file") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3961 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3962 # use Storable qw(nfreeze);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3963 # use MIME::Base64 qw(encode_base64);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3964 # open NEW, "| gzip -9 -c > ".$dump_file or die "ERROR: Could not write to dump file $dump_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3965 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3966 # foreach my $type(keys %{$rf_cache->{$chr}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3967 # foreach my $rf(sort {$a->start <=> $b->start} @{$rf_cache->{$chr}->{$type}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3968 # print NEW join "\t", (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3969 # $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3970 # $rf->start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3971 # $rf->end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3972 # $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3973 # encode_base64(freeze($rf), "")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3974 # );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3975 # print NEW "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3976 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3977 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3978 # close NEW;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3979 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3980
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3981 # loads in dumped transcript cache to memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3982 sub load_dumped_reg_feat_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3983 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3984 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3985 my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3986
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3987 my $dump_file = get_dump_file_name($config, $chr, $region, 'reg');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3988
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3989 return undef unless -e $dump_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3990
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3991 debug("Reading cached reg feat data for chromosome $chr".(defined $region ? "\:$region" : "")." from dumped file") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3992
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3993 open my $fh, $config->{compress}." ".$dump_file." |" or return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3994 my $rf_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3995 $rf_cache = fd_retrieve($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3996 close $fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3997
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3998 return $rf_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3999 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4000
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4001
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4002
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4003 #sub load_dumped_reg_feat_cache_tabix {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4004 # my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4005 # my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4006 # my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4007 # my $trim_regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4008 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4009 # my $dump_file = get_dump_file_name($config, $chr, $region, 'reg');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4010 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4011 # #print STDERR "Reading from $dump_file\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4012 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4013 # return undef unless -e $dump_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4014 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4015 # debug("Reading cached reg feat data for chromosome $chr".(defined $region ? "\:$region" : "")." from dumped file") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4016 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4017 # my $rf_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4018 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4019 # use MIME::Base64 qw(decode_base64);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4020 # use Storable qw(thaw);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4021 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4022 # my ($s, $e) = split /\-/, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4023 # my @regions = grep {overlap($s, $e, (split /\-/, $_))} @{$trim_regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4024 # my $regions = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4025 # $regions .= " $chr\:$_" for @regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4026 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4027 # #print STDERR "tabix $dump_file $regions |\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4028 # #open IN, "tabix $dump_file $regions |";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4029 # open IN, "gzip -dc $dump_file |";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4030 # while(<IN>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4031 # my ($chr, $start, $end, $type, $blob) = split /\t/, $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4032 # next unless grep {overlap($start, $end, (split /\-/, $_))} @regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4033 # my $rf = thaw(decode_base64($blob));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4034 # push @{$rf_cache->{$chr}->{$type}}, $rf;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4035 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4036 # close IN;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4037 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4038 # $rf_cache->{$chr}->{$_} ||= [] for @REG_FEAT_TYPES;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4039 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4040 # return $rf_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4041 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4042
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4043
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4044 # get custom annotation for a region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4045 sub cache_custom_annotation {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4046 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4047 my $include_regions = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4048 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4049
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4050 #$include_regions = merge_regions($include_regions, $config, 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4051
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4052 my $annotation = {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4053
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4054 my $total = scalar @{$config->{custom}} * scalar @{$include_regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4055 my $counter = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4057 my $max_regions_per_tabix = 1000;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4058
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4059 debug("Caching custom annotations") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4060
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4061 foreach my $custom(@{$config->{custom}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4062
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4063 my @regions = @{$include_regions->{$chr}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4064
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4065 while(scalar @regions) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4066 my $got_features = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4067
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4068 my @tmp_regions = splice @regions, 0, $max_regions_per_tabix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4069
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4070 progress($config, $counter, $total);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4071 $counter += scalar @tmp_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4072
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4073 # some files may have e.g. chr10 instead of 10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4074 for my $tmp_chr($chr, 'chr'.$chr) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4076 # bigwig needs to use bigWigToWig utility
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4077 if($custom->{format} eq 'bigwig') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4078 foreach my $region(@tmp_regions) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4079 my ($s, $e) = split /\-/, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4080 my $tmp_file = $config->{tmpdir}.'/vep_tmp_'.$$.'_'.$tmp_chr.'_'.$s.'_'.$e;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4081 my $bigwig_file = $custom->{file};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4082 my $bigwig_output = `bigWigToWig -chrom=$tmp_chr -start=$s -end=$e $bigwig_file $tmp_file 2>&1`;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4083
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4084 die "\nERROR: Problem using bigwig file $bigwig_file\n$bigwig_output" if $bigwig_output;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4085 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4086
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4087 # concatenate all the files together
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4088 my $string = $config->{tmpdir}.'/vep_tmp_'.$$.'_*';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4089 my $tmp_file = $config->{tmpdir}.'/vep_tmp_'.$$;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4090 `cat $string > $tmp_file; rm $string`;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4091 open CUSTOM, $tmp_file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4092 or die "\nERROR: Could not read from temporary WIG file $tmp_file\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4093 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4094
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4095 # otherwise use tabix
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4096 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4097 # tabix can fetch multiple regions, so construct a string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4098 my $region_string = join " ", map {$tmp_chr.':'.$_} @tmp_regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4099
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4100 open CUSTOM, "tabix ".$custom->{file}." $region_string 2>&1 |"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4101 or die "\nERROR: Could not open tabix pipe for ".$custom->{file}."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4102 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4104 # set an error flag so we don't have to check every line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4105 my $error_flag = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4107 while(<CUSTOM>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4108 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4110 # check for errors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4111 if($error_flag) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4112 die "\nERROR: Problem using annotation file ".$custom->{file}."\n$_\n" if /invalid pointer|tabix|get_intv/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4113 $error_flag = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4114 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4116 my @data = split /\t/, $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4118 my $feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4120 if($custom->{format} eq 'bed') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4121 $feature = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4122 chr => $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4123 start => $data[1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4124 end => $data[2],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4125 name => $data[3],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4126 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4127 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4129 elsif($custom->{format} eq 'vcf') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4130 my $tmp_vf = parse_vcf($config, $_)->[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4132 $feature = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4133 chr => $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4134 start => $tmp_vf->{start},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4135 end => $tmp_vf->{end},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4136 name => $tmp_vf->{variation_name},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4137 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4138 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4140 elsif($custom->{format} eq 'gff' || $custom->{format} eq 'gtf') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4142 my $name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4144 # try and get a feature name from the attributes column
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4145 foreach my $attrib(split /\s*\;\s*/, $data[8]) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4146 my ($key, $value) = split /\=/, $attrib;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4147 $name = $value if $key eq 'ID';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4150 $name ||= $data[2]."_".$data[0].":".$data[3]."-".$data[4];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4152 $feature = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4153 chr => $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4154 start => $data[3],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4155 end => $data[4],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4156 name => $name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4157 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4158 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4160 elsif($custom->{format} eq 'bigwig') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4161 $feature = {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4162 chr => $chr,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4163 start => $data[0],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4164 end => $data[0],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4165 name => $data[1],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4166 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4167 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4169 if(defined($feature)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4170 $got_features = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4172 if(!defined($feature->{name}) || $custom->{coords}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4173 $feature->{name} = $feature->{chr}.":".$feature->{start}."-".$feature->{end};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4174 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4176 # add the feature to the cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4177 $annotation->{$chr}->{$custom->{name}}->{$feature->{start}}->{$feature->{name}} = $feature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4178 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4179 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4180 close CUSTOM;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4182 # unlink temporary wig files
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4183 unlink($config->{tmpdir}.'/vep_tmp_'.$$) if $custom->{format} eq 'bigwig';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4185 # no need to fetch e.g. "chr21" features if just "21" worked
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4186 last if $got_features;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4187 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4188 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4189 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4191 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4193 return $annotation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4194 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4196 # builds a full cache for this species
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4197 sub build_full_cache {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4198 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4200 my @slices;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4202 if($config->{build} =~ /all/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4203 @slices = @{$config->{sa}->fetch_all('toplevel')};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4204 push @slices, @{$config->{sa}->fetch_all('lrg', undef, 1, undef, 1)} if defined($config->{lrg});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4205 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4206 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4207 foreach my $val(split /\,/, $config->{build}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4208 my @nnn = split /\-/, $val;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4210 foreach my $chr($nnn[0]..$nnn[-1]) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4211 my $slice = get_slice($config, $chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4212 push @slices, $slice if defined($slice);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4213 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4214 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4215 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4217 foreach my $slice(@slices) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4218 my $chr = $slice->seq_region_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4220 # check for features, we don't want a load of effectively empty dirs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4221 my $dbc = $config->{sa}->db->dbc;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4222 my $sth = $dbc->prepare("SELECT COUNT(*) FROM transcript WHERE seq_region_id = ?");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4223 $sth->execute($slice->get_seq_region_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4225 my $count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4226 $sth->bind_columns(\$count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4227 $sth->fetch;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4228 $sth->finish;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4229
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4230 next unless $count > 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4232 my $regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4234 # for progress
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4235 my $region_count = int($slice->end / $config->{cache_region_size}) + 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4236 my $counter = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4238 # initial region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4239 my ($start, $end) = (1, $config->{cache_region_size});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4241 debug((defined($config->{rebuild}) ? "Rebuild" : "Creat")."ing cache for chromosome $chr") unless defined($config->{quiet});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4243 while($start < $slice->end) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4244
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4245 progress($config, $counter++, $region_count);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4247 # store quiet status
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4248 my $quiet = $config->{quiet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4249 $config->{quiet} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4251 # spoof regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4252 $regions->{$chr} = [$start.'-'.$end];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4253
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4254 # store transcripts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4255 my $tmp_cache = (defined($config->{rebuild}) ? load_dumped_transcript_cache($config, $chr, $start.'-'.$end) : cache_transcripts($config, $regions));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4256 $tmp_cache->{$chr} ||= [];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4257
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4258 #(defined($config->{tabix}) ? dump_transcript_cache_tabix($config, $tmp_cache, $chr, $start.'-'.$end) : dump_transcript_cache($config, $tmp_cache, $chr, $start.'-'.$end));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4259 dump_transcript_cache($config, $tmp_cache, $chr, $start.'-'.$end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4260 undef $tmp_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4262 # store reg feats
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4263 if(defined($config->{regulatory})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4264 my $rf_cache = cache_reg_feats($config, $regions);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4265 $rf_cache->{$chr} ||= {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4266
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4267 dump_reg_feat_cache($config, $rf_cache, $chr, $start.'-'.$end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4268 #(defined($config->{tabix}) ? dump_reg_feat_cache_tabix($config, $rf_cache, $chr, $start.'-'.$end) : dump_reg_feat_cache($config, $rf_cache, $chr, $start.'-'.$end));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4269 undef $rf_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4270
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4271 # this gets cleaned off but needs to be there for the next loop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4272 $slice->{coord_system}->{adaptor} = $config->{csa};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4273 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4275 # store variations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4276 my $variation_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4277 $variation_cache->{$chr} = get_variations_in_region($config, $chr, $start.'-'.$end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4278 $variation_cache->{$chr} ||= {};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4280 dump_variation_cache($config, $variation_cache, $chr, $start.'-'.$end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4281 undef $variation_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4282
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4283 # restore quiet status
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4284 $config->{quiet} = $quiet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4285
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4286 # increment by cache_region_size to get next region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4287 $start += $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4288 $end += $config->{cache_region_size};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4289 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4291 end_progress($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4292
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4293 undef $regions;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4294 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4296 write_cache_info($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4297 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4299 # write an info file that defines what is in the cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4300 sub write_cache_info {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4301 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4303 my $info_file = $config->{dir}.'/info.txt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4305 open OUT, ">>$info_file" or die "ERROR: Could not write to cache info file $info_file\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4307 print OUT "# CACHE UPDATED ".get_time()."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4309 foreach my $param(qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4310 host
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4311 port
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4312 user
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4313 build
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4314 regulatory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4315 sift
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4316 polyphen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4317 )) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4318 print OUT "$param\t".(defined $config->{$param} ? $config->{$param} : '-')."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4319 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4320
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4321 # cell types
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4322 if(defined($config->{cell_type}) && scalar(@{$config->{cell_type}})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4323 my $cta = $config->{RegulatoryFeature_adaptor}->db->get_CellTypeAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4324 print OUT "cell_types\t".(join ",", map {$_->name} @{$cta->fetch_all});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4325 print OUT "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4326 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4327
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4328 close OUT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4329 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4331 # reads in cache info file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4332 sub read_cache_info {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4333 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4334
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4335 my $info_file = $config->{dir}.'/info.txt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4336
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4337 open IN, $info_file or return 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4338
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4339 while(<IN>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4340 next if /^#/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4341 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4342 my ($param, $value) = split /\t/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4343 $config->{'cache_'.$param} = $value unless defined $value && $value eq '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4344 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4346 close IN;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4348 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4349 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4350
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4351 # format coords for printing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4352 sub format_coords {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4353 my ($start, $end) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4355 if(defined($start)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4356 if(defined($end)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4357 if($start > $end) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4358 return $end.'-'.$start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4359 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4360 elsif($start == $end) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4361 return $start;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4362 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4363 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4364 return $start.'-'.$end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4365 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4366 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4367 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4368 return $start.'-?';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4369 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4370 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4371 elsif(defined($end)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4372 return '?-'.$end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4373 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4374 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4375 return '-';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4376 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4377 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4381
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4382 # METHODS TO FIND CO-LOCATED / EXISTING VARIATIONS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4383 ##################################################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4384
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4385 # finds an existing VF in the db
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4386 sub find_existing {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4387 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4388 my $new_vf = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4389
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4390 if(defined($config->{vfa}->db)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4391
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4392 my $maf_cols = have_maf_cols($config) ? 'vf.minor_allele, vf.minor_allele_freq' : 'NULL, NULL';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4394 my $sth = $config->{vfa}->db->dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4395 SELECT variation_name, IF(fv.variation_id IS NULL, 0, 1), seq_region_start, seq_region_end, allele_string, seq_region_strand, $maf_cols
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4396 FROM variation_feature vf LEFT JOIN failed_variation fv
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4397 ON vf.variation_id = fv.variation_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4398 WHERE vf.seq_region_id = ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4399 AND vf.seq_region_start = ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4400 AND vf.seq_region_end = ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4401 ORDER BY vf.source_id ASC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4402 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4403
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4404 $sth->execute($new_vf->slice->get_seq_region_id, $new_vf->start, $new_vf->end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4405
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4406 my @v;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4407 for my $i(0..7) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4408 $v[$i] = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4409 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4410
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4411 $sth->bind_columns(\$v[0], \$v[1], \$v[2], \$v[3], \$v[4], \$v[5], \$v[6], \$v[7]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4412
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4413 my @found;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4415 while($sth->fetch) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4416 push @found, $v[0] unless is_var_novel($config, \@v, $new_vf) || $v[1] > $config->{failed};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4417 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4418
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4419 $sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4421 return (scalar @found ? join ",", @found : undef);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4422 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4424 return undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4425 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4426
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4427 # compare a new vf to one from the cache / DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4428 sub is_var_novel {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4429 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4430 my $existing_var = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4431 my $new_var = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4432
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4433 my $is_novel = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4434
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4435 $is_novel = 0 if $existing_var->[2] == $new_var->start && $existing_var->[3] == $new_var->end;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4436
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4437 if(defined($config->{check_alleles})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4438 my %existing_alleles;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4439
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4440 $existing_alleles{$_} = 1 for split /\//, $existing_var->[4];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4441
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4442 my $seen_new = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4443 foreach my $a(split /\//, ($new_var->allele_string || "")) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4444 reverse_comp(\$a) if $new_var->strand ne $existing_var->[5];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4445 $seen_new = 1 unless defined $existing_alleles{$a};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4446 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4448 $is_novel = 1 if $seen_new;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4449 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4450
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4451 return $is_novel;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4452 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4454 # check frequencies of existing var against requested params
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4455 sub check_frequencies {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4456 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4457 my $var_name = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4458
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4459 my $v = $config->{va}->fetch_by_name($var_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4460
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4461 my $pass = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4463 my $freq_pop = $config->{freq_pop};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4464 my $freq_freq = $config->{freq_freq};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4465 my $freq_gt_lt = $config->{freq_gt_lt};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4466
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4467 my $freq_pop_name = (split /\_/, $freq_pop)[-1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4468 $freq_pop_name = undef if $freq_pop_name =~ /1kg|hap/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4469
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4470 delete $config->{filtered_freqs};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4471
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4472 foreach my $a(@{$v->get_all_Alleles}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4473 next unless defined $a->{population} || defined $a->{'_population_id'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4474 next unless defined $a->frequency;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4475 next if $a->frequency > 0.5;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4477 my $pop_name = $a->population->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4478
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4479 if($freq_pop =~ /1kg/) { next unless $pop_name =~ /^1000.+(low|phase).+/i; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4480 if($freq_pop =~ /hap/) { next unless $pop_name =~ /^CSHL-HAPMAP/i; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4481 if($freq_pop =~ /any/) { next unless $pop_name =~ /^(CSHL-HAPMAP)|(1000.+(low|phase).+)/i; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4482 if(defined $freq_pop_name) { next unless $pop_name =~ /$freq_pop_name/i; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4483
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4484 $pass = 1 if $a->frequency >= $freq_freq and $freq_gt_lt eq 'gt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4485 $pass = 1 if $a->frequency <= $freq_freq and $freq_gt_lt eq 'lt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4487 $pop_name =~ s/\:/\_/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4488 push @{$config->{filtered_freqs}}, $pop_name.':'.$a->frequency;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4489
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4490 #warn "Comparing allele ", $a->allele, " ", $a->frequency, " for $var_name in population ", $a->population->name, " PASS $pass";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4491 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4492
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4493 return 0 if $config->{freq_filter} eq 'exclude' and $pass == 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4494 return 0 if $config->{freq_filter} eq 'include' and $pass == 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4495 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4496 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4498 # gets all variations in a region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4499 sub get_variations_in_region {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4500 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4501 my $chr = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4502 my $region = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4503
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4504 my ($start, $end) = split /\-/, $region;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4505
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4506 my %variations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4507
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4508 if(defined($config->{vfa}->db)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4509 my $sr_cache = $config->{seq_region_cache};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4510
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4511 if(!defined($sr_cache)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4512 $sr_cache = cache_seq_region_ids($config);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4513 $config->{seq_region_cache} = $sr_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4514 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4515
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4516 # no seq_region_id?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4517 return {} unless defined($sr_cache) && defined($sr_cache->{$chr});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4518
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4519 my $maf_cols = have_maf_cols($config) ? 'vf.minor_allele, vf.minor_allele_freq' : 'NULL, NULL';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4520
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4521 my $sth = $config->{vfa}->db->dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4522 SELECT vf.variation_name, IF(fv.variation_id IS NULL, 0, 1), vf.seq_region_start, vf.seq_region_end, vf.allele_string, vf.seq_region_strand, $maf_cols
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4523 FROM variation_feature vf
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4524 LEFT JOIN failed_variation fv ON fv.variation_id = vf.variation_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4525 WHERE vf.seq_region_id = ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4526 AND vf.seq_region_start >= ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4527 AND vf.seq_region_start <= ?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4528 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4529
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4530 $sth->execute($sr_cache->{$chr}, $start, $end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4532 my @v;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4533 for my $i(0..7) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4534 $v[$i] = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4535 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4536
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4537 $sth->bind_columns(\$v[0], \$v[1], \$v[2], \$v[3], \$v[4], \$v[5], \$v[6], \$v[7]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4539 while($sth->fetch) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4540 my @v_copy = @v;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4541 push @{$variations{$v[2]}}, \@v_copy;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4542 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4544 $sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4545 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4546
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4547 return \%variations;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4548 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4549
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4550 sub cache_seq_region_ids {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4551 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4552
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4553 my (%cache, $chr, $id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4554
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4555 my $sth = $config->{vfa}->db->dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4556 SELECT seq_region_id, name FROM seq_region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4557 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4558
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4559 $sth->execute();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4560 $sth->bind_columns(\$id, \$chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4561 $cache{$chr} = $id while $sth->fetch();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4562 $sth->finish;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4563
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4564 return \%cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4565 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4566
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4567 sub have_maf_cols {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4568 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4569
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4570 if(!defined($config->{have_maf_cols})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4571 my $sth = $config->{vfa}->db->dbc->prepare(qq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4572 DESCRIBE variation_feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4573 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4574
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4575 $sth->execute();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4576 my @cols = map {$_->[0]} @{$sth->fetchall_arrayref};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4577 $sth->finish();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4579 $config->{have_maf_cols} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4580 $config->{have_maf_cols} = 1 if grep {$_ eq 'minor_allele'} @cols;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4581 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4582
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4583 return $config->{have_maf_cols};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4584 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4585
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4586 sub merge_hashes {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4587 my ($x, $y) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4589 foreach my $k (keys %$y) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4590 if (!defined($x->{$k})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4591 $x->{$k} = $y->{$k};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4592 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4593 if(ref($x->{$k}) eq 'ARRAY') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4594 $x->{$k} = merge_arrays($x->{$k}, $y->{$k});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4595 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4596 elsif(ref($x->{$k}) eq 'HASH') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4597 $x->{$k} = merge_hashes($x->{$k}, $y->{$k});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4598 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4599 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4600 $x->{$k} = $y->{$k};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4601 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4602 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4603 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4604 return $x;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4605 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4606
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4607 sub merge_arrays {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4608 my ($x, $y) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4609
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4610 my %tmp = map {$_ => 1} (@$x, @$y);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4611
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4612 return [keys %tmp];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4613 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4614
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4615
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4616
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4617
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4618 # DEBUG AND STATUS METHODS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4619 ##########################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4620
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4621 # gets time
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4622 sub get_time() {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4623 my @time = localtime(time());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4624
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4625 # increment the month (Jan = 0)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4626 $time[4]++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4628 # add leading zeroes as required
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4629 for my $i(0..4) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4630 $time[$i] = "0".$time[$i] if $time[$i] < 10;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4631 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4632
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4633 # put the components together in a string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4634 my $time =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4635 ($time[5] + 1900)."-".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4636 $time[4]."-".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4637 $time[3]." ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4638 $time[2].":".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4639 $time[1].":".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4640 $time[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4641
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4642 return $time;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4643 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4644
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4645 # prints debug output with time
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4646 sub debug {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4647 my $text = (@_ ? (join "", @_) : "No message");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4648 my $time = get_time;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4649
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4650 print $time." - ".$text.($text =~ /\n$/ ? "" : "\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4651 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4652
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4653 # finds out memory usage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4654 sub memory {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4655 my @mem;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4656
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4657 open IN, "ps -o rss,vsz $$ |";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4658 while(<IN>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4659 next if $_ =~ /rss/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4660 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4661 @mem = split;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4662 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4663 close IN;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4664
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4665 return \@mem;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4666 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4667
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4668 sub mem_diff {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4669 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4670 my $mem = memory();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4671 my @diffs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4672
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4673 if(defined($config->{memory})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4674 for my $i(0..(scalar @{$config->{memory}} - 1)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4675 push @diffs, $mem->[$i] - $config->{memory}->[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4676 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4677 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4678 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4679 @diffs = @$mem;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4680 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4681
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4682 $config->{memory} = $mem;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4683
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4684 return \@diffs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4685 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4686
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4687 # update or initiate progress bar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4688 sub progress {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4689 my ($config, $i, $total) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4690
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4691 return if defined($config->{quiet}) || defined($config->{no_progress});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4692
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4693 my $width = $config->{terminal_width} || 60;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4694 my $percent = int(($i/$total) * 100);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4695 my $numblobs = int((($i/$total) * $width) - 2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4696
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4697 # this ensures we're not writing to the terminal too much
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4698 return if(defined($config->{prev_prog})) && $numblobs.'-'.$percent eq $config->{prev_prog};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4699 $config->{prev_prog} = $numblobs.'-'.$percent;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4700
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4701 printf("\r% -${width}s% 1s% 10s", '['.('=' x $numblobs).($numblobs == $width - 2 ? '=' : '>'), ']', "[ " . $percent . "% ]");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4702 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4703
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4704 # end progress bar
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4705 sub end_progress {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4706 my $config = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4707 return if defined($config->{quiet}) || defined($config->{no_progress});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4708 progress($config, 1,1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4709 print "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4710 delete $config->{prev_prog};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4711 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4712
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4713 1;