0
|
1 package Bio::EnsEMBL::Variation::Utils::Config;
|
|
2
|
|
3 use base qw(Exporter);
|
|
4
|
|
5 our @EXPORT_OK = qw(
|
|
6 @ATTRIB_TYPES
|
|
7 %ATTRIBS
|
|
8 @ATTRIB_SETS
|
|
9 @VARIATION_CLASSES
|
|
10 @OVERLAP_CONSEQUENCES
|
|
11 @FEATURE_TYPES
|
|
12 $OVERLAP_CONSEQUENCE_CLASS
|
|
13 $MAX_ATTRIB_CODE_LENGTH
|
|
14 );
|
|
15
|
|
16 our $OVERLAP_CONSEQUENCE_CLASS = 'Bio::EnsEMBL::Variation::OverlapConsequence';
|
|
17
|
|
18 our $MAX_ATTRIB_CODE_LENGTH = 20;
|
|
19
|
|
20 our @short_names = qw(1kg_hct 1kg_hct_ceu 1kg_hct_yri 1kg_hce 1kg_hce_ceu 1kg_hce_chb
|
|
21 1kg_hce_chd 1kg_hce_jpt 1kg_hce_lwk 1kg_hce_tsi 1kg_hce_yri 1kg_lc
|
|
22 1kg_lc_ceu 1kg_lc_chb_jpt 1kg_lc_yri hapmap
|
|
23 1kg 1kg_afr 1kg_amr 1kg_asn 1kg_eur 1kg_afr 1kg_amr_com 1kg_asn_com 1kg_eur_com
|
|
24 ind_venter ind_watson ind_gill ind_ak1 ind_irish ind_angrist
|
|
25 ind_gates_jr ind_gates_sr ind_kriek ind_quake ind_saqqaq ind_saqqaq_hc ind_sjk ind_yh
|
|
26 fail_all fail_nonref fail_ambig fail_gt_fq fail_incons_map fail_mult_map
|
|
27 fail_no_alleles fail_no_gt fail_no_map fail_no_seq fail_non_nt fail_mult_alleles fail_dbsnp_suspect
|
|
28 ph_hgmd_pub ph_johnson_et_al ph_nhgri ph_omim ph_variants ph_uniprot
|
|
29 ph_cosmic ph_ega precious hapmap_ceu hapmap_hcb hapmap_jpt hapmap_yri
|
|
30 Affy_500K Affy_SNP6 Cardio-Metabo_Chip HumanOmni1-Quad Illumina_1M-duo Illumina_660Q
|
|
31 );
|
|
32
|
|
33 our @dbsnp_clinical_significance_types = qw(
|
|
34 unknown
|
|
35 untested
|
|
36 non-pathogenic
|
|
37 probable-non-pathogenic
|
|
38 probable-pathogenic
|
|
39 pathogenic
|
|
40 drug-response
|
|
41 histocompatibility
|
|
42 other
|
|
43 );
|
|
44
|
|
45 our @dgva_clinical_significance_types = (
|
|
46 'Not tested',
|
|
47 'Benign',
|
|
48 'Pathogenic',
|
|
49 'Uncertain Significance',
|
|
50 'Uncertain Significance: likely benign',
|
|
51 'Uncertain Significance: likely pathogenic'
|
|
52 );
|
|
53
|
|
54 our @VARIATION_CLASSES = (
|
|
55 {
|
|
56 SO_accession => 'SO:0001483',
|
|
57 SO_term => 'SNV',
|
|
58 display_term => 'SNP',
|
|
59 somatic_display_term => 'somatic_SNV',
|
|
60 },
|
|
61 {
|
|
62 SO_accession => 'SO:1000002',
|
|
63 SO_term => 'substitution',
|
|
64 },
|
|
65 {
|
|
66 SO_accession => 'SO:0001019',
|
|
67 SO_term => 'copy_number_variation',
|
|
68 display_term => 'CNV',
|
|
69 },
|
|
70 {
|
|
71 SO_accession => 'SO:0000667',
|
|
72 SO_term => 'insertion',
|
|
73 },
|
|
74 {
|
|
75 SO_accession => 'SO:0000159',
|
|
76 SO_term => 'deletion',
|
|
77 },
|
|
78 {
|
|
79 SO_accession => 'SO:1000032',
|
|
80 SO_term => 'indel',
|
|
81 },
|
|
82 {
|
|
83 SO_accession => 'SO:0000705',
|
|
84 SO_term => 'tandem_repeat',
|
|
85 },
|
|
86 {
|
|
87 SO_accession => 'SO:0001059',
|
|
88 SO_term => 'sequence_alteration',
|
|
89 },
|
|
90 # Structural variation classes
|
|
91 {
|
|
92 SO_accession => 'SO:0001537',
|
|
93 SO_term => 'structural_variant',
|
|
94 display_term => 'SV',
|
|
95 },
|
|
96 {
|
|
97 SO_accession => 'SO:0000051',
|
|
98 SO_term => 'probe',
|
|
99 display_term => 'CNV_PROBE',
|
|
100 },
|
|
101 {
|
|
102 SO_accession => 'SO:0001742',
|
|
103 SO_term => 'copy_number_gain',
|
|
104 display_term => 'Gain',
|
|
105 },
|
|
106 {
|
|
107 SO_accession => 'SO:0001743',
|
|
108 SO_term => 'copy_number_loss',
|
|
109 display_term => 'Loss',
|
|
110 },
|
|
111 {
|
|
112 SO_accession => 'SO:1000036',
|
|
113 SO_term => 'inversion',
|
|
114 },
|
|
115 {
|
|
116 SO_accession => 'SO:0001784',
|
|
117 SO_term => 'complex_structural_alteration',
|
|
118 display_term => 'Complex',
|
|
119 },
|
|
120 {
|
|
121 SO_accession => 'SO:1000173',
|
|
122 SO_term => 'tandem_duplication',
|
|
123 display_term => 'Tandem duplication',
|
|
124 },
|
|
125 {
|
|
126 SO_accession => 'SO:0001837',
|
|
127 SO_term => 'mobile_element_insertion',
|
|
128 display_term => 'Mobile element insertion',
|
|
129 },
|
|
130 {
|
|
131 SO_accession => 'SO:0001873',
|
|
132 SO_term => 'interchromosomal_breakpoint',
|
|
133 display_term => 'Interchromosomal breakpoint',
|
|
134 },
|
|
135 {
|
|
136 SO_accession => 'SO:0001874',
|
|
137 SO_term => 'intrachromosomal_breakpoint',
|
|
138 display_term => 'Intrachromosomal breakpoint',
|
|
139 },
|
|
140 {
|
|
141 SO_accession => 'SO:0000199',
|
|
142 SO_term => 'translocation',
|
|
143 },
|
|
144 {
|
|
145 SO_accession => 'SO:1000035',
|
|
146 SO_term => 'duplication',
|
|
147 display_term => 'Duplication',
|
|
148 },
|
|
149 );
|
|
150
|
|
151 our @OVERLAP_CONSEQUENCES = (
|
|
152 {
|
|
153 SO_accession => 'SO:0001628',
|
|
154 SO_term => 'intergenic_variant',
|
|
155 display_term => 'INTERGENIC',
|
|
156 rank => '38',
|
|
157 tier => '4',
|
|
158 description => 'A sequence variant located in the intergenic region, between genes',
|
|
159 label => 'Intergenic variant',
|
|
160 is_default => 1,
|
|
161 },
|
|
162 {
|
|
163 SO_accession => 'SO:0001631',
|
|
164 SO_term => 'upstream_gene_variant',
|
|
165 display_term => 'UPSTREAM',
|
|
166 feature_SO_term => 'transcript',
|
|
167 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
168 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
169 rank => '24',
|
|
170 tier => '3',
|
|
171 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::upstream',
|
|
172 description => 'A sequence variant located 5\' of a gene',
|
|
173 label => 'Upstream gene variant',
|
|
174 },
|
|
175 {
|
|
176 SO_accession => 'SO:0001632',
|
|
177 SO_term => 'downstream_gene_variant',
|
|
178 display_term => 'DOWNSTREAM',
|
|
179 feature_SO_term => 'transcript',
|
|
180 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
181 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
182 rank => '25',
|
|
183 tier => '3',
|
|
184 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::downstream',
|
|
185 description => 'A sequence variant located 3\' of a gene',
|
|
186 label => 'Downstream gene variant',
|
|
187 },
|
|
188 {
|
|
189 SO_accession => 'SO:0001575',
|
|
190 SO_term => 'splice_donor_variant',
|
|
191 display_term => 'ESSENTIAL_SPLICE_SITE',
|
|
192 NCBI_term => 'splice-5',
|
|
193 feature_SO_term => 'primary_transcript',
|
|
194 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
195 variant_feature_class => 'Bio::EnsEMBL::Variation::VariationFeature',
|
|
196 rank => '3',
|
|
197 tier => '3',
|
|
198 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::donor_splice_site',
|
|
199 description => 'A splice variant that changes the 2 base region at the 5\' end of an intron',
|
|
200 label => 'Splice donor variant',
|
|
201 },
|
|
202 {
|
|
203 SO_accession => 'SO:0001574',
|
|
204 SO_term => 'splice_acceptor_variant',
|
|
205 display_term => 'ESSENTIAL_SPLICE_SITE',
|
|
206 NCBI_term => 'splice-3',
|
|
207 feature_SO_term => 'primary_transcript',
|
|
208 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
209 variant_feature_class => 'Bio::EnsEMBL::Variation::VariationFeature',
|
|
210 rank => '3',
|
|
211 tier => '3',
|
|
212 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::acceptor_splice_site',
|
|
213 description => 'A splice variant that changes the 2 base region at the 3\' end of an intron',
|
|
214 label => 'Splice acceptor variant',
|
|
215 },
|
|
216 {
|
|
217 SO_accession => 'SO:0001630',
|
|
218 SO_term => 'splice_region_variant',
|
|
219 display_term => 'SPLICE_SITE',
|
|
220 feature_SO_term => 'primary_transcript',
|
|
221 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
222 variant_feature_class => 'Bio::EnsEMBL::Variation::VariationFeature',
|
|
223 rank => '13',
|
|
224 tier => '3',
|
|
225 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::splice_region',
|
|
226 description => 'A sequence variant in which a change has occurred within the region of the splice site, either within 1-3 bases of the exon or 3-8 bases of the intron',
|
|
227 label => 'Splice region variant',
|
|
228 },
|
|
229 {
|
|
230 SO_accession => 'SO:0001627',
|
|
231 SO_term => 'intron_variant',
|
|
232 display_term => 'INTRONIC',
|
|
233 NCBI_term => 'intron',
|
|
234 feature_SO_term => 'primary_transcript',
|
|
235 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
236 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
237 rank => '20',
|
|
238 tier => '3',
|
|
239 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_intron',
|
|
240 description => 'A transcript variant occurring within an intron',
|
|
241 label => 'Intron variant',
|
|
242 },
|
|
243 {
|
|
244 SO_accession => 'SO:0001623',
|
|
245 SO_term => '5_prime_UTR_variant',
|
|
246 display_term => '5PRIME_UTR',
|
|
247 NCBI_term => 'untranslated_5',
|
|
248 feature_SO_term => 'mRNA',
|
|
249 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
250 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
251 rank => '18',
|
|
252 tier => '3',
|
|
253 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_5_prime_utr',
|
|
254 description => 'A UTR variant of the 5\' UTR',
|
|
255 label => '5 prime UTR variant',
|
|
256 },
|
|
257 {
|
|
258 SO_accession => 'SO:0001624',
|
|
259 SO_term => '3_prime_UTR_variant',
|
|
260 display_term => '3PRIME_UTR',
|
|
261 NCBI_term => 'untranslated_3',
|
|
262 feature_SO_term => 'mRNA',
|
|
263 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
264 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
265 rank => '19',
|
|
266 tier => '3',
|
|
267 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_3_prime_utr',
|
|
268 description => 'A UTR variant of the 3\' UTR',
|
|
269 label => '3 prime UTR variant',
|
|
270 },
|
|
271 # {
|
|
272 # SO_accession => 'SO:0001577',
|
|
273 # SO_term => 'complex_change_in_transcript',
|
|
274 # display_term => 'COMPLEX_INDEL',
|
|
275 # feature_SO_term => 'primary_transcript',
|
|
276 # feature_class => 'Bio::EnsEMBL::Transcript',
|
|
277 # variant_feature_class => 'Bio::EnsEMBL::Variation::VariationFeature',
|
|
278 # rank => '4',
|
|
279 # tier => '3',
|
|
280 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::complex_indel',
|
|
281 # description => 'Insertion or deletion that spans an exon/intron or coding sequence/UTR border',
|
|
282 # label => 'Complex change in transcript',
|
|
283 # },
|
|
284 {
|
|
285 SO_accession => 'SO:0001819',
|
|
286 SO_term => 'synonymous_variant',
|
|
287 display_term => 'SYNONYMOUS_CODING',
|
|
288 NCBI_term => 'cds-synon',
|
|
289 feature_SO_term => 'mRNA',
|
|
290 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
291 variant_feature_class => 'Bio::EnsEMBL::Variation::VariationFeature',
|
|
292 rank => '15',
|
|
293 tier => '3',
|
|
294 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::synonymous_variant',
|
|
295 description => 'A sequence variant where there is no resulting change to the encoded amino acid',
|
|
296 label => 'Synonymous variant',
|
|
297 },
|
|
298 {
|
|
299 SO_accession => 'SO:0001583',
|
|
300 SO_term => 'missense_variant',
|
|
301 display_term => 'NON_SYNONYMOUS_CODING',
|
|
302 NCBI_term => 'missense',
|
|
303 feature_SO_term => 'mRNA',
|
|
304 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
305 variant_feature_class => 'Bio::EnsEMBL::Variation::VariationFeature',
|
|
306 rank => '12',
|
|
307 tier => '3',
|
|
308 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::missense_variant',
|
|
309 description => 'A sequence variant, where the change may be longer than 3 bases, and at least one base of a codon is changed resulting in a codon that encodes for a different amino acid',
|
|
310 label => 'Missense variant',
|
|
311 },
|
|
312 {
|
|
313 SO_accession => 'SO:0001821',
|
|
314 SO_term => 'inframe_insertion',
|
|
315 display_term => 'NON_SYNONYMOUS_CODING',
|
|
316 feature_SO_term => 'mRNA',
|
|
317 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
318 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
319 rank => '10',
|
|
320 tier => '3',
|
|
321 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::inframe_insertion',
|
|
322 description => 'An inframe non synonymous variant that inserts bases into in the coding sequence',
|
|
323 label => 'Inframe insertion',
|
|
324 },
|
|
325 {
|
|
326 SO_accession => 'SO:0001822',
|
|
327 SO_term => 'inframe_deletion',
|
|
328 display_term => 'NON_SYNONYMOUS_CODING',
|
|
329 feature_SO_term => 'mRNA',
|
|
330 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
331 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
332 rank => '11',
|
|
333 tier => '3',
|
|
334 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::inframe_deletion',
|
|
335 description => 'An inframe non synonymous variant that deletes bases from the coding sequence',
|
|
336 label => 'Inframe deletion',
|
|
337 },
|
|
338 {
|
|
339 SO_accession => 'SO:0001587',
|
|
340 SO_term => 'stop_gained',
|
|
341 display_term => 'STOP_GAINED',
|
|
342 NCBI_term => 'nonsense',
|
|
343 feature_SO_term => 'mRNA',
|
|
344 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
345 variant_feature_class => 'Bio::EnsEMBL::Variation::VariationFeature',
|
|
346 rank => '4',
|
|
347 tier => '3',
|
|
348 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::stop_gained',
|
|
349 description => 'A sequence variant whereby at least one base of a codon is changed, resulting in a premature stop codon, leading to a shortened transcript',
|
|
350 label => 'Stop gained',
|
|
351 },
|
|
352 {
|
|
353 SO_accession => 'SO:0001578',
|
|
354 SO_term => 'stop_lost',
|
|
355 display_term => 'STOP_LOST',
|
|
356 feature_SO_term => 'mRNA',
|
|
357 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
358 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
359 rank => '6',
|
|
360 tier => '3',
|
|
361 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::stop_lost',
|
|
362 description => 'A sequence variant where at least one base of the terminator codon (stop) is changed, resulting in an elongated transcript',
|
|
363 label => 'Stop lost',
|
|
364 },
|
|
365 {
|
|
366 SO_accession => 'SO:0001567',
|
|
367 SO_term => 'stop_retained_variant',
|
|
368 display_term => 'SYNONYMOUS_CODING',
|
|
369 feature_SO_term => 'mRNA',
|
|
370 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
371 variant_feature_class => 'Bio::EnsEMBL::Variation::VariationFeature',
|
|
372 rank => '15',
|
|
373 tier => '3',
|
|
374 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::stop_retained',
|
|
375 description => 'A sequence variant where at least one base in the terminator codon is changed, but the terminator remains',
|
|
376 label => 'Stop retained variant',
|
|
377 },
|
|
378 {
|
|
379 SO_accession => 'SO:0001582',
|
|
380 SO_term => 'initiator_codon_variant',
|
|
381 display_term => 'NON_SYNONYMOUS_CODING',
|
|
382 feature_SO_term => 'mRNA',
|
|
383 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
384 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
385 rank => '7',
|
|
386 tier => '3',
|
|
387 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::affects_start_codon',
|
|
388 description => 'A codon variant that changes at least one base of the first codon of a transcript',
|
|
389 label => 'Initiator codon variant',
|
|
390 },
|
|
391 {
|
|
392 SO_accession => 'SO:0001589',
|
|
393 SO_term => 'frameshift_variant',
|
|
394 display_term => 'FRAMESHIFT_CODING',
|
|
395 NCBI_term => 'frameshift',
|
|
396 feature_SO_term => 'mRNA',
|
|
397 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
398 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
399 rank => '5',
|
|
400 tier => '3',
|
|
401 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::frameshift',
|
|
402 description => 'A sequence variant which causes a disruption of the translational reading frame, because the number of nucleotides inserted or deleted is not a multiple of three',
|
|
403 label => 'Frameshift variant',
|
|
404 },
|
|
405 {
|
|
406 SO_accession => 'SO:0001626',
|
|
407 SO_term => 'incomplete_terminal_codon_variant',
|
|
408 display_term => 'PARTIAL_CODON',
|
|
409 feature_SO_term => 'mRNA',
|
|
410 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
411 variant_feature_class => 'Bio::EnsEMBL::Variation::VariationFeature',
|
|
412 rank => '14',
|
|
413 tier => '3',
|
|
414 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::partial_codon',
|
|
415 description => 'A sequence variant where at least one base of the final codon of an incompletely annotated transcript is changed',
|
|
416 label => 'Incomplete terminal codon variant',
|
|
417 },
|
|
418 {
|
|
419 SO_accession => 'SO:0001621',
|
|
420 SO_term => 'NMD_transcript_variant',
|
|
421 display_term => 'NMD_TRANSCRIPT',
|
|
422 feature_SO_term => 'mRNA',
|
|
423 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
424 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
425 rank => '21',
|
|
426 tier => '3',
|
|
427 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_nmd_transcript',
|
|
428 description => 'A variant in a transcript that is the target of NMD',
|
|
429 label => 'NMD transcript variant',
|
|
430 },
|
|
431 {
|
|
432 SO_accession => 'SO:0001619',
|
|
433 SO_term => 'nc_transcript_variant',
|
|
434 display_term => 'WITHIN_NON_CODING_GENE',
|
|
435 feature_SO_term => 'ncRNA',
|
|
436 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
437 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
438 rank => '23',
|
|
439 tier => '3',
|
|
440 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_non_coding_gene',
|
|
441 description => 'A transcript variant of a non coding RNA',
|
|
442 label => 'NC transcript variant',
|
|
443 },
|
|
444 {
|
|
445 SO_accession => 'SO:0001792',
|
|
446 SO_term => 'non_coding_exon_variant',
|
|
447 display_term => 'WITHIN_NON_CODING_GENE',
|
|
448 feature_SO_term => 'ncRNA',
|
|
449 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
450 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
451 rank => '22',
|
|
452 tier => '3',
|
|
453 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::non_coding_exon_variant',
|
|
454 description => 'A sequence variant that changes non-coding exon sequence',
|
|
455 label => 'Non coding exon variant',
|
|
456 },
|
|
457 {
|
|
458 SO_accession => 'SO:0001620',
|
|
459 SO_term => 'mature_miRNA_variant',
|
|
460 display_term => 'WITHIN_MATURE_miRNA',
|
|
461 feature_SO_term => 'miRNA',
|
|
462 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
463 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
464 rank => '17',
|
|
465 tier => '2',
|
|
466 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_mature_miRNA',
|
|
467 description => 'A transcript variant located with the sequence of the mature miRNA',
|
|
468 label => 'Mature miRNA variant',
|
|
469 },
|
|
470 {
|
|
471 SO_accession => 'SO:0001580',
|
|
472 SO_term => 'coding_sequence_variant',
|
|
473 display_term => 'CODING_UNKNOWN',
|
|
474 feature_SO_term => 'mRNA',
|
|
475 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
476 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
477 rank => '16',
|
|
478 tier => '3',
|
|
479 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::coding_unknown',
|
|
480 description => 'A sequence variant that changes the coding sequence',
|
|
481 label => 'Coding sequence variant',
|
|
482 },
|
|
483 {
|
|
484 SO_accession => 'SO:0001566',
|
|
485 SO_term => 'regulatory_region_variant',
|
|
486 display_term => 'REGULATORY_REGION',
|
|
487 feature_SO_term => 'regulatory_region',
|
|
488 feature_class => 'Bio::EnsEMBL::Funcgen::RegulatoryFeature',
|
|
489 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
490 rank => '36',
|
|
491 tier => '2',
|
|
492 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_regulatory_feature',
|
|
493 description => 'A sequence variant located within a regulatory region',
|
|
494 label => 'Regulatory region variant',
|
|
495 },
|
|
496 # {
|
|
497 # SO_accession => 'SO:X000005',
|
|
498 # SO_term => 'pre_miRNA_variant',
|
|
499 # display_term => 'WITHIN_NON_CODING_GENE',
|
|
500 # feature_SO_term => 'miRNA',
|
|
501 # feature_class => 'Bio::EnsEMBL::Transcript',
|
|
502 # rank => '13',
|
|
503 # tier => '2',
|
|
504 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_miRNA',
|
|
505 # },
|
|
506 # {
|
|
507 # SO_accession => 'SO:X000004',
|
|
508 # SO_term => 'miRNA_target_site_variant',
|
|
509 # display_term => 'REGULATORY_REGION',
|
|
510 # feature_SO_term => 'binding_site',
|
|
511 # feature_class => 'Bio::EnsEMBL::Funcgen::ExternalFeature',
|
|
512 # rank => '13',
|
|
513 # tier => '2',
|
|
514 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_miRNA_target_site',
|
|
515 # description => 'In regulatory region annotated by Ensembl',
|
|
516 # label => 'Regulatory region',
|
|
517 # },
|
|
518 {
|
|
519 SO_accession => 'SO:0001782',
|
|
520 SO_term => 'TF_binding_site_variant',
|
|
521 display_term => 'REGULATORY_REGION',
|
|
522 feature_SO_term => 'TF_binding_site',
|
|
523 feature_class => 'Bio::EnsEMBL::Funcgen::MotifFeature',
|
|
524 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
525 rank => '30',
|
|
526 tier => '2',
|
|
527 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::within_motif_feature',
|
|
528 description => 'In regulatory region annotated by Ensembl',
|
|
529 label => 'A sequence variant located within a transcription factor binding site',
|
|
530 },
|
|
531
|
|
532 # {
|
|
533 # SO_accession => 'SO:X000002',
|
|
534 # SO_term => 'decreased_binding_affinity',
|
|
535 # display_term => 'REGULATORY_REGION',
|
|
536 # feature_SO_term => 'binding_site',
|
|
537 # feature_class => 'Bio::EnsEMBL::Funcgen::MotifFeature',
|
|
538 # rank => '47',
|
|
539 # tier => '2',
|
|
540 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::decreased_binding_affinity',
|
|
541 # },
|
|
542 # {
|
|
543 # SO_accession => 'SO:X000001',
|
|
544 # SO_term => 'increased_binding_affinity',
|
|
545 # display_term => 'REGULATORY_REGION',
|
|
546 # feature_SO_term => 'binding_site',
|
|
547 # feature_class => 'Bio::EnsEMBL::Funcgen::MotifFeature',
|
|
548 # rank => '48',
|
|
549 # tier => '2',
|
|
550 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::increased_binding_affinity',
|
|
551 # },
|
|
552
|
|
553
|
|
554 ## NEW FOR 68
|
|
555 #############
|
|
556
|
|
557 {
|
|
558 SO_accession => 'SO:0001893',
|
|
559 SO_term => 'transcript_ablation',
|
|
560 feature_SO_term => 'mRNA',
|
|
561 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
562 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
563 rank => '1',
|
|
564 tier => '1',
|
|
565 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::feature_ablation',
|
|
566 description => 'A feature ablation whereby the deleted region includes a transcript feature',
|
|
567 label => 'Transcript ablation',
|
|
568 },
|
|
569 # {
|
|
570 # SO_accession => 'SO:0001886',
|
|
571 # SO_term => 'transcript_fusion',
|
|
572 # feature_SO_term => 'mRNA',
|
|
573 # feature_class => 'Bio::EnsEMBL::Transcript',
|
|
574 # variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
575 # rank => '2',
|
|
576 # tier => '2',
|
|
577 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::transcript_fusion',
|
|
578 # description => 'A feature fusion where the deletion brings together transcript regions',
|
|
579 # label => 'Transcript fusion',
|
|
580 # },
|
|
581 {
|
|
582 SO_accession => 'SO:0001889',
|
|
583 SO_term => 'transcript_amplification',
|
|
584 feature_SO_term => 'mRNA',
|
|
585 feature_class => 'Bio::EnsEMBL::Transcript',
|
|
586 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
587 rank => '8',
|
|
588 tier => '1',
|
|
589 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::feature_amplification',
|
|
590 description => 'A feature amplification of a region containing a transcript',
|
|
591 label => 'Transcript amplification',
|
|
592 },
|
|
593 # {
|
|
594 # SO_accession => 'SO:0001883',
|
|
595 # SO_term => 'transcript_translocation',
|
|
596 # feature_SO_term => 'mRNA',
|
|
597 # feature_class => 'Bio::EnsEMBL::Transcript',
|
|
598 # variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
599 # rank => '9',
|
|
600 #tier => '2',
|
|
601 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::transcript_translocation',
|
|
602 # description => 'A feature translocation where the region contains a transcript',
|
|
603 # label => 'Transcript translocation',
|
|
604 # },
|
|
605 {
|
|
606 SO_accession => 'SO:0001895',
|
|
607 SO_term => 'TFBS_ablation',
|
|
608 feature_SO_term => 'TF_binding_site',
|
|
609 feature_class => 'Bio::EnsEMBL::Funcgen::MotifFeature',
|
|
610 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
611 rank => '26',
|
|
612 tier => '2',
|
|
613 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::feature_ablation',
|
|
614 description => 'A feature ablation whereby the deleted region includes a transcription factor binding site',
|
|
615 label => 'TFBS ablation',
|
|
616 },
|
|
617 # {
|
|
618 # SO_accession => 'SO:0001888',
|
|
619 # SO_term => 'TFBS_fusion',
|
|
620 # feature_SO_term => 'TF_binding_site',
|
|
621 # feature_class => 'Bio::EnsEMBL::Funcgen::MotifFeature',
|
|
622 # variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
623 # rank => '27',
|
|
624 #tier => '2',
|
|
625 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::motif_feature_fusion',
|
|
626 # description => 'A fusion where the deletion brings together transcription factor binding sites',
|
|
627 # label => 'TFBS fusion',
|
|
628 # },
|
|
629 {
|
|
630 SO_accession => 'SO:0001892',
|
|
631 SO_term => 'TFBS_amplification',
|
|
632 feature_SO_term => 'TF_binding_site',
|
|
633 feature_class => 'Bio::EnsEMBL::Funcgen::MotifFeature',
|
|
634 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
635 rank => '28',
|
|
636 tier => '2',
|
|
637 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::feature_amplification',
|
|
638 description => 'A feature amplification of a region containing a transcription factor binding site',
|
|
639 label => 'TFBS amplification',
|
|
640 },
|
|
641 # {
|
|
642 # SO_accession => 'SO:0001885',
|
|
643 # SO_term => 'TFBS_translocation',
|
|
644 # feature_SO_term => 'TF_binding_site',
|
|
645 # feature_class => 'Bio::EnsEMBL::Funcgen::MotifFeature',
|
|
646 # variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
647 # rank => '29',
|
|
648 #tier => '2',
|
|
649 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::motif_feature_translocation',
|
|
650 # description => 'A feature translocation where the region contains a transcription factor binding site',
|
|
651 # label => 'TFBS translocation',
|
|
652 # },
|
|
653 {
|
|
654 SO_accession => 'SO:0001894',
|
|
655 SO_term => 'regulatory_region_ablation',
|
|
656 feature_SO_term => 'TF_binding_site',
|
|
657 feature_class => 'Bio::EnsEMBL::Funcgen::RegulatoryFeature',
|
|
658 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
659 rank => '31',
|
|
660 tier => '2',
|
|
661 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::feature_ablation',
|
|
662 description => 'A feature ablation whereby the deleted region includes a regulatory region',
|
|
663 label => 'Regulatory region ablation',
|
|
664 },
|
|
665 # {
|
|
666 # SO_accession => 'SO:0001887',
|
|
667 # SO_term => 'regulatory_region_fusion',
|
|
668 # feature_SO_term => 'TF_binding_site',
|
|
669 # feature_class => 'Bio::EnsEMBL::Funcgen::RegulatoryFeature',
|
|
670 # variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
671 # rank => '32',
|
|
672 #tier => '2',
|
|
673 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::regulatory_feature_fusion',
|
|
674 # description => 'A fusion where the deletion brings together regulatory regions',
|
|
675 # label => 'Regulatory region fusion',
|
|
676 # },
|
|
677 {
|
|
678 SO_accession => 'SO:0001891',
|
|
679 SO_term => 'regulatory_region_amplification',
|
|
680 feature_SO_term => 'TF_binding_site',
|
|
681 feature_class => 'Bio::EnsEMBL::Funcgen::RegulatoryFeature',
|
|
682 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
683 rank => '33',
|
|
684 tier => '2',
|
|
685 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::feature_amplification',
|
|
686 description => 'A feature amplification of a region containing a regulatory region',
|
|
687 label => 'Regulatory region amplification',
|
|
688 },
|
|
689 # {
|
|
690 # SO_accession => 'SO:0001884',
|
|
691 # SO_term => 'regulatory_region_translocation',
|
|
692 # feature_SO_term => 'TF_binding_site',
|
|
693 # feature_class => 'Bio::EnsEMBL::Funcgen::RegulatoryFeature',
|
|
694 # variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
695 # rank => '34',
|
|
696 #tier => '2',
|
|
697 # predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::regulatory_feature_translocation',
|
|
698 # description => 'A feature translocation where the region contains a regulatory region',
|
|
699 # label => 'Regulatory region translocation',
|
|
700 # },
|
|
701 {
|
|
702 SO_accession => 'SO:0001907',
|
|
703 SO_term => 'feature_elongation',
|
|
704 feature_class => 'Bio::EnsEMBL::Feature',
|
|
705 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
706 rank => '36',
|
|
707 tier => '3',
|
|
708 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::feature_elongation',
|
|
709 description => 'A sequence variant that causes the extension of a genomic feature, with regard to the reference sequence',
|
|
710 label => 'Feature elongation',
|
|
711 },
|
|
712 {
|
|
713 SO_accession => 'SO:0001906',
|
|
714 SO_term => 'feature_truncation',
|
|
715 feature_class => 'Bio::EnsEMBL::Feature',
|
|
716 variant_feature_class => 'Bio::EnsEMBL::Variation::BaseVariationFeature',
|
|
717 rank => '37',
|
|
718 tier => '3',
|
|
719 predicate => 'Bio::EnsEMBL::Variation::Utils::VariationEffect::feature_truncation',
|
|
720 description => 'A sequence variant that causes the reduction of a genomic feature, with regard to the reference sequence',
|
|
721 label => 'Feature truncation',
|
|
722 },
|
|
723 );
|
|
724
|
|
725 our @FEATURE_TYPES = (
|
|
726 {
|
|
727 SO_accession => 'SO:0000234',
|
|
728 SO_term => 'mRNA',
|
|
729 ens_feature_class => 'Bio::EnsEMBL::Transcript',
|
|
730 ens_feature_subtype => 'protein_coding',
|
|
731 ens_variant_class => 'Bio::EnsEMBL::Variation::TranscriptVariation',
|
|
732 },
|
|
733 {
|
|
734 SO_accession => 'SO:0000673',
|
|
735 SO_term => 'transcript',
|
|
736 ens_feature_class => 'Bio::EnsEMBL::Transcript',
|
|
737 ens_variant_class => 'Bio::EnsEMBL::Variation::TranscriptVariation',
|
|
738 },
|
|
739 {
|
|
740 SO_accession => 'SO:0000185',
|
|
741 SO_term => 'primary_transcript',
|
|
742 ens_feature_class => 'Bio::EnsEMBL::Transcript',
|
|
743 ens_variant_class => 'Bio::EnsEMBL::Variation::TranscriptVariation',
|
|
744 },
|
|
745 {
|
|
746 SO_accession => 'SO:0000655',
|
|
747 SO_term => 'ncRNA',
|
|
748 ens_feature_class => 'Bio::EnsEMBL::Transcript',
|
|
749 ens_variant_class => 'Bio::EnsEMBL::Variation::TranscriptVariation',
|
|
750 },
|
|
751 {
|
|
752 SO_accession => 'SO:0000276',
|
|
753 SO_term => 'miRNA',
|
|
754 ens_feature_class => 'Bio::EnsEMBL::Transcript',
|
|
755 ens_variant_class => 'Bio::EnsEMBL::Variation::TranscriptVariation',
|
|
756 },
|
|
757 {
|
|
758 SO_accession => 'SO:0005836',
|
|
759 SO_term => 'regulatory_region',
|
|
760 ens_feature_class => 'Bio::EnsEMBL::Funcgen::RegulatoryFeature',
|
|
761 ens_variant_class => 'Bio::EnsEMBL::Variation::RegulatoryFeatureVariation',
|
|
762 },
|
|
763 {
|
|
764 SO_accession => 'SO:0000409',
|
|
765 SO_term => 'binding_site',
|
|
766 ens_feature_class => 'Bio::EnsEMBL::Funcgen::MotifFeature',
|
|
767 ens_variant_class => 'Bio::EnsEMBL::Variation::MotifFeatureVariation',
|
|
768 },
|
|
769 {
|
|
770 SO_accession => 'SO:0005836',
|
|
771 SO_term => 'regulatory_region',
|
|
772 ens_feature_class => 'Bio::EnsEMBL::Funcgen::ExternalFeature',
|
|
773 ens_variant_class => 'Bio::EnsEMBL::Variation::ExternalFeatureVariation',
|
|
774 ens_feature_subtype => 'VISTA enhancer set',
|
|
775 },
|
|
776 {
|
|
777 SO_accession => 'SO:0000409',
|
|
778 SO_term => 'binding_site',
|
|
779 ens_feature_class => 'Bio::EnsEMBL::Funcgen::ExternalFeature',
|
|
780 ens_variant_class => 'Bio::EnsEMBL::Variation::ExternalFeatureVariation',
|
|
781 ens_feature_subtype => 'cisRED motif',
|
|
782 },
|
|
783 {
|
|
784 SO_accession => 'SO:0005836',
|
|
785 SO_term => 'regulatory_region',
|
|
786 ens_feature_class => 'Bio::EnsEMBL::Funcgen::ExternalFeature',
|
|
787 ens_variant_class => 'Bio::EnsEMBL::Variation::ExternalFeatureVariation',
|
|
788 ens_feature_subtype => 'miRanda miRNA target',
|
|
789 },
|
|
790 {
|
|
791 SO_accession => 'SO:0000110',
|
|
792 SO_term => 'sequence_feature',
|
|
793 ens_feature_class => 'Bio::EnsEMBL::Feature',
|
|
794 ens_variant_class => 'Bio::EnsEMBL::Variation::StructuralVariationFeatureOverlap',
|
|
795 },
|
|
796 );
|
|
797
|
|
798 # attrib_types are specified as hashrefs in the @ATTRIB_TYPES array. Each hashref should have a value for the key 'code' and optionally values for the keys 'name' and 'description'
|
|
799 our @ATTRIB_TYPES = (
|
|
800 {
|
|
801 code => 'SO_accession',
|
|
802 description => 'Sequence Ontology accession',
|
|
803 },
|
|
804 {
|
|
805 code => 'SO_term',
|
|
806 description => 'Sequence Ontology term',
|
|
807 },
|
|
808 {
|
|
809 code => 'display_term',
|
|
810 description => 'Ensembl display term',
|
|
811 },
|
|
812 {
|
|
813 code => 'NCBI_term',
|
|
814 description => 'NCBI term',
|
|
815 },
|
|
816 {
|
|
817 code => 'feature_SO_term',
|
|
818 description => 'Sequence Ontology term for the associated feature',
|
|
819 },
|
|
820 {
|
|
821 code => 'rank',
|
|
822 description => 'Relative severity of this variation consequence',
|
|
823 },
|
|
824 {
|
|
825 code => 'polyphen_prediction',
|
|
826 description => 'PolyPhen-2 prediction',
|
|
827 },
|
|
828 {
|
|
829 code => 'sift_prediction',
|
|
830 description => 'SIFT prediction',
|
|
831 },
|
|
832 {
|
|
833 code => 'short_name',
|
|
834 name => 'Short name',
|
|
835 description => 'A shorter name for an instance, e.g. a VariationSet',
|
|
836 },
|
|
837 {
|
|
838 code => 'dbsnp_clin_sig',
|
|
839 name => 'dbSNP clinical significance',
|
|
840 description => 'The clinical significance of a variant as reported by dbSNP',
|
|
841 },
|
|
842 {
|
|
843 code => 'dgva_clin_sig',
|
|
844 name => 'DGVa clinical significance',
|
|
845 description => 'The clinical significance of a structural variant as reported by DGVa',
|
|
846 },
|
|
847 {
|
|
848 code => 'prot_func_analysis',
|
|
849 name => 'Protein function analysis ',
|
|
850 description => 'The program used to make protein function predictions',
|
|
851 },
|
|
852
|
|
853 );
|
|
854
|
|
855 # attribs are specified in the %ATTRIBS hash, having the attrib_type code as hash key and a listref containing the attribs that will be loaded as value
|
|
856 our %ATTRIBS = (
|
|
857 'short_name' => \@short_names,
|
|
858 'dbsnp_clin_sig' => \@dbsnp_clinical_significance_types,
|
|
859 'dgva_clin_sig' => \@dgva_clinical_significance_types,
|
|
860 'polyphen_prediction' => ['probably damaging', 'possibly damaging', 'benign', 'unknown'],
|
|
861 'sift_prediction' => [qw(tolerated deleterious)],
|
|
862 'prot_func_analysis' => [qw(sift polyphen_humvar polyphen_humdiv)],
|
|
863 );
|
|
864
|
|
865 # attrib sets are specified by putting a hashref in the @ATTRIB_SETS array having the attrib_type code as key and the attrib as value. new attrib entries will be inserted as necessary
|
|
866 our @ATTRIB_SETS = (
|
|
867 @VARIATION_CLASSES,
|
|
868 @OVERLAP_CONSEQUENCES,
|
|
869 @FEATURE_TYPES
|
|
870 );
|
|
871
|
|
872 1;
|