annotate variant_effect_predictor/Bio/EnsEMBL/Utils/Sequence.pm @ 0:21066c0abaf5 draft

Uploaded
author willmclaren
date Fri, 03 Aug 2012 10:04:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 =head1 LICENSE
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 This software is distributed under a modified Apache license.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 For license details, please see
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 =head1 CONTACT
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14 developers list at <dev@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 <helpdesk@ensembl.org>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23 Bio::EnsEMBL::Utils::Sequence - Utility functions for sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp expand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29 my $seq = 'ACTTTAAAGGCTATCCCAATATG';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 print "my sequence = $seq\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 reverse_comp( \$seq );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 print "my reverse comp = $seq\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 my $compressed_seq = '(AC)3';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 print "my expanded seq is = expand($compressed_seq)";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 =head1 METHODS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46 package Bio::EnsEMBL::Utils::Sequence;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49 use warnings;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 use Exporter;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53 use vars qw(@ISA @EXPORT_OK);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55 @ISA = qw(Exporter);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57 @EXPORT_OK = qw(&reverse_comp &expand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 =head2 reverse_comp
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 Arg [1] : reference to a string $seqref
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63 Example : use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65 $seq = 'ACCTGAA';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 reverse_comp(\$seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67 print $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 Description: Does an in place reverse compliment of a passed in string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70 reference. The string is passed by reference
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 rather than by value for memory efficiency.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74 Caller : SequenceAdaptor, SliceAdaptor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 sub reverse_comp {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 my $seqref = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 $$seqref = reverse( $$seqref );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82 $$seqref =~
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 return;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88 =head2 expand
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 Arg [1] : reference to a string $seqref
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91 Example : use Bio::EnsEMBL::Utils::Sequence qw(expand);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93 $seq = '(AC)3';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 expand(\$seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95 print $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 Description: Expands a genomic sequence. The string is passed by reference
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99 rather than by value for memory efficiency.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 Returntype : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 Exceptions : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102 Caller : SequenceAdaptor, SliceAdaptor
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 sub expand {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107 my $seq_ref = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108 $$seq_ref =~ s/(\w*)\((\w+)\)(\d+)/$1.$2 x $3/eg if ($$seq_ref =~ /\(/);#expressions with parenthesis, expand the alleles
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 return;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 1;