0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <dev@ensembl.org>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 =head1 NAME
|
|
22
|
|
23 Bio::EnsEMBL::Utils::Sequence - Utility functions for sequences
|
|
24
|
|
25 =head1 SYNOPSIS
|
|
26
|
|
27 use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp expand);
|
|
28
|
|
29 my $seq = 'ACTTTAAAGGCTATCCCAATATG';
|
|
30
|
|
31 print "my sequence = $seq\n";
|
|
32
|
|
33 reverse_comp( \$seq );
|
|
34
|
|
35 print "my reverse comp = $seq\n";
|
|
36
|
|
37 my $compressed_seq = '(AC)3';
|
|
38
|
|
39 print "my expanded seq is = expand($compressed_seq)";
|
|
40
|
|
41 =head1 METHODS
|
|
42
|
|
43 =cut
|
|
44
|
|
45
|
|
46 package Bio::EnsEMBL::Utils::Sequence;
|
|
47
|
|
48 use strict;
|
|
49 use warnings;
|
|
50
|
|
51 use Exporter;
|
|
52
|
|
53 use vars qw(@ISA @EXPORT_OK);
|
|
54
|
|
55 @ISA = qw(Exporter);
|
|
56
|
|
57 @EXPORT_OK = qw(&reverse_comp &expand);
|
|
58
|
|
59
|
|
60 =head2 reverse_comp
|
|
61
|
|
62 Arg [1] : reference to a string $seqref
|
|
63 Example : use Bio::EnsEMBL::Utils::Sequence qw(reverse_comp);
|
|
64
|
|
65 $seq = 'ACCTGAA';
|
|
66 reverse_comp(\$seq);
|
|
67 print $seq;
|
|
68
|
|
69 Description: Does an in place reverse compliment of a passed in string
|
|
70 reference. The string is passed by reference
|
|
71 rather than by value for memory efficiency.
|
|
72 Returntype : none
|
|
73 Exceptions : none
|
|
74 Caller : SequenceAdaptor, SliceAdaptor
|
|
75
|
|
76 =cut
|
|
77
|
|
78 sub reverse_comp {
|
|
79 my $seqref = shift;
|
|
80
|
|
81 $$seqref = reverse( $$seqref );
|
|
82 $$seqref =~
|
|
83 tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
|
|
84
|
|
85 return;
|
|
86 }
|
|
87
|
|
88 =head2 expand
|
|
89
|
|
90 Arg [1] : reference to a string $seqref
|
|
91 Example : use Bio::EnsEMBL::Utils::Sequence qw(expand);
|
|
92
|
|
93 $seq = '(AC)3';
|
|
94 expand(\$seq);
|
|
95 print $seq;
|
|
96
|
|
97
|
|
98 Description: Expands a genomic sequence. The string is passed by reference
|
|
99 rather than by value for memory efficiency.
|
|
100 Returntype : none
|
|
101 Exceptions : none
|
|
102 Caller : SequenceAdaptor, SliceAdaptor
|
|
103
|
|
104 =cut
|
|
105
|
|
106 sub expand {
|
|
107 my $seq_ref = shift;
|
|
108 $$seq_ref =~ s/(\w*)\((\w+)\)(\d+)/$1.$2 x $3/eg if ($$seq_ref =~ /\(/);#expressions with parenthesis, expand the alleles
|
|
109 return;
|
|
110 }
|
|
111
|
|
112
|
|
113 1;
|