| 0 | 1 =head1 LICENSE | 
|  | 2 | 
|  | 3 Copyright [1999-2015] Wellcome Trust Sanger Institute and the EMBL-European Bioinformatics Institute | 
|  | 4 Copyright [2016-2018] EMBL-European Bioinformatics Institute | 
|  | 5 | 
|  | 6 Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 7 you may not use this file except in compliance with the License. | 
|  | 8 You may obtain a copy of the License at | 
|  | 9 | 
|  | 10      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 11 | 
|  | 12 Unless required by applicable law or agreed to in writing, software | 
|  | 13 distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 15 See the License for the specific language governing permissions and | 
|  | 16 limitations under the License. | 
|  | 17 | 
|  | 18 =head1 CONTACT | 
|  | 19 | 
|  | 20  Ensembl <http://www.ensembl.org/info/about/contact/index.html> | 
|  | 21 | 
|  | 22 =cut | 
|  | 23 | 
|  | 24 =head1 NAME | 
|  | 25 | 
|  | 26   CSN | 
|  | 27 | 
|  | 28 =head1 SYNOPSIS | 
|  | 29 | 
|  | 30   mv CSN.pm ~/.vep/Plugins | 
|  | 31   ./vep -i variations.vcf --cache --plugin CSN | 
|  | 32 | 
|  | 33 =head1 DESCRIPTION | 
|  | 34 | 
|  | 35   This is a plugin for the Ensembl Variant Effect Predictor (VEP) that | 
|  | 36   reports Clinical Sequencing Nomenclature (CSN) for variants. | 
|  | 37 | 
|  | 38   Each notation is given with reference to the transcript identifier; | 
|  | 39   specify "--plugin CSN,1" to remove this identifier from the CSN string. | 
|  | 40 | 
|  | 41   You may also wish to specify "--no_escape" to prevent the "=" in "p.=" | 
|  | 42   notations being converted to the URI-escaped equivalent "p.%3D"; doing | 
|  | 43   so may break parsers looking for "=" as a KEY=VALUE separator. | 
|  | 44 | 
|  | 45   See http://biorxiv.org/content/early/2015/03/21/016808.1 | 
|  | 46 | 
|  | 47 =cut | 
|  | 48 | 
|  | 49 package CSN; | 
|  | 50 | 
|  | 51 use strict; | 
|  | 52 use warnings; | 
|  | 53 | 
|  | 54 use Bio::EnsEMBL::Variation::Utils::BaseVepPlugin; | 
|  | 55 use Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor; | 
|  | 56 use Bio::EnsEMBL::Variation::DBSQL::DBAdaptor; | 
|  | 57 | 
|  | 58 use base qw(Bio::EnsEMBL::Variation::Utils::BaseVepPlugin); | 
|  | 59 | 
|  | 60 sub new { | 
|  | 61   my $class = shift; | 
|  | 62 | 
|  | 63   my $self = $class->SUPER::new(@_); | 
|  | 64 | 
|  | 65   # check config is OK | 
|  | 66 | 
|  | 67   # FASTA file defined, optimal | 
|  | 68   if(!defined($self->{config}->{fasta})) { | 
|  | 69 | 
|  | 70     # offline mode won't work without FASTA | 
|  | 71     die("ERROR: Cannot generate CSN without either a FASTA file (--fasta) or a database connection (--cache or --database)\n") if defined($self->{config}->{offline}) and !defined($self->{config}->{quiet}); | 
|  | 72 | 
|  | 73     # cache mode will work, but DB will be accessed | 
|  | 74     warn("WARNING: Database will be accessed using this plugin; use a FASTA file (--fasta) for optimal performance") if defined($self->{config}->{cache}) and !defined($self->{config}->{quiet}); | 
|  | 75   } | 
|  | 76 | 
|  | 77   no warnings 'once'; | 
|  | 78   $Bio::EnsEMBL::Variation::DBSQL::TranscriptVariationAdaptor::DEFAULT_SHIFT_HGVS_VARIANTS_3PRIME = 1; | 
|  | 79   no warnings 'once'; | 
|  | 80   $Bio::EnsEMBL::Variation::DBSQL::DBAdaptor::DEFAULT_SHIFT_HGVS_VARIANTS_3PRIME = 1; | 
|  | 81 | 
|  | 82   $self->{remove_transcript_ID} = $self->params->[0]; | 
|  | 83 | 
|  | 84   return $self; | 
|  | 85 } | 
|  | 86 | 
|  | 87 sub feature_types { | 
|  | 88   return ['Transcript']; | 
|  | 89 } | 
|  | 90 | 
|  | 91 sub variant_feature_types { | 
|  | 92   return ['VariationFeature']; | 
|  | 93 } | 
|  | 94 | 
|  | 95 sub get_header_info { | 
|  | 96   return { CSN => 'Clinical Sequencing Nomenclature'}; | 
|  | 97 } | 
|  | 98 | 
|  | 99 sub run { | 
|  | 100   my ($self, $tva) = @_; | 
|  | 101 | 
|  | 102   my ($hgvs_c, $hgvs_p) = ($tva->hgvs_transcript || '', $tva->hgvs_protein || ''); | 
|  | 103 | 
|  | 104   return {} unless $hgvs_c; | 
|  | 105 | 
|  | 106   # trim off transcript/protein ID | 
|  | 107   $hgvs_c =~ s/.+\:// if $self->{remove_transcript_ID}; | 
|  | 108   $hgvs_p =~ s/.+\://; | 
|  | 109 | 
|  | 110   # change Ter to X | 
|  | 111   $hgvs_p =~ s/Ter/X/g; | 
|  | 112 | 
|  | 113   # leave just p.= | 
|  | 114   $hgvs_p = 'p.=' if $hgvs_p =~ /p\.\=/; | 
|  | 115 | 
|  | 116   # escape | 
|  | 117   $hgvs_p =~ s/\=/\%3D/g unless $self->{config}->{no_escape}; | 
|  | 118 | 
|  | 119   return { CSN => $hgvs_c.($hgvs_p ? '_'.$hgvs_p : '') }; | 
|  | 120 } | 
|  | 121 | 
|  | 122 1; |