annotate variant_effect_predictor/Bio/AlignIO/nexus.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: nexus.pm,v 1.12.2.1 2003/04/07 15:17:17 heikki Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::AlignIO::nexus
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Copyright Heikki Lehvaslaiho
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 Bio::AlignIO::nexus - NEXUS format sequence input/output stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 Do not use this module directly. Use it via the L<Bio::AlignIO> class.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 This object can transform L<Bio::Align::AlignI> objects to and from NEXUS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 data blocks. See method documentation for supported NEXUS features.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 =head1 ACKNOWLEDGEMENTS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 Will Fisher has written an excellent standalone NEXUS format parser in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 perl, readnexus. A number of tricks were adapted from it.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 the bugs and their resolution.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 Bug reports can be submitted via email or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 =head1 AUTHORS - Heikki Lehvaslaiho
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 Email: heikki@ebi.ac.uk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 methods. Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 # Let the code begin...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 package Bio::AlignIO::nexus;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 use vars qw(@ISA %valid_type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 no strict "refs";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use Bio::AlignIO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 @ISA = qw(Bio::AlignIO);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 BEGIN {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 %valid_type = map {$_, 1} qw( dna rna protein standard);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 =head2 next_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 Title : next_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 Usage : $aln = $stream->next_aln()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 Function: Returns the next alignment in the stream.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 Supports the following NEXUS format features:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 - The file has to start with '#NEXUS'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 - Reads in the name of the alignment from a comment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 (anything after 'TITLE: ') .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 - Sequence names can be given in a taxa block, too.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 - If matchchar notation is used, converts
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 them back to sequence characters.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 - Does character conversions specified in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 NEXUS equate command.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 - Sequence names of type 'Homo sapiens' and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 Homo_sapiens are treated identically.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 Returns : L<Bio::Align::AlignI> object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 Args :
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 sub next_aln {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 my $entry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 my ($aln_name, $seqcount, $residuecount, %hash, $alphabet,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 $match, $gap, $missing, $equate, $interleave,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 $name,$str,@names,$seqname,$start,$end,$count,$seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 my $aln = Bio::SimpleAlign->new(-source => 'nexus');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 # file starts with '#NEXUS' but we allow white space only lines before it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 $entry = $self->_readline;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 $entry = $self->_readline while $entry =~ /^\s+$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 return unless $entry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 $self->throw("Not a valid interleaved NEXUS file! [#NEXUS] not starting the file\n$entry")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 unless $entry =~ /^#NEXUS/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 # skip anything before either the taxa or data block
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 # but read in the optional title in a comment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 while (defined($entry = $self->_readline)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 local ($_) = $entry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 /\[TITLE. *([^\]]+)]\s+/i and $aln_name = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 last if /^begin +data/i || /^begin +taxa/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 $aln_name =~ s/\s/_/g and $aln->id($aln_name) if $aln_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 # data and taxa blocks
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 my $taxlabels;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 while ($entry = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 local ($_) = $entry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 # read in seq names if in taxa block
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 $taxlabels = 1 if /taxlabels/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 if ($taxlabels) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 @names = $self->_read_taxlabels;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 $taxlabels = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 /ntax ?= ?(\d+)/i and $seqcount = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 /nchar ?= ?(\d+)/i and $residuecount = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 /matchchar ?= ?(.)/i and $match = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 /gap ?= ?(.)/i and $gap = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 /missing ?= ?(.)/i and $missing = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 /equate ?= ?"([^\"]+)/i and $equate = $1; # "e.g. equate="T=C G=A";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 /datatype ?= ?(\w+)/i and $alphabet = lc $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 /interleave/i and $interleave = 1 ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 last if /matrix/i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 $self->throw("Not a valid NEXUS sequence file. Datatype not specified")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 unless $alphabet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 $self->throw("Not a valid NEXUS sequence file. Datatype should not be [$alphabet]")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 unless $valid_type{$alphabet};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 $aln->gap_char($gap);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 $aln->missing_char($missing);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 # if data is not right after the matrix line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 # read the empty lines out
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 while ($entry = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 unless ($entry =~ /^\s+$/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 $self->_pushback($entry);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 # matrix command
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 # first alignment section
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 if (@names == 0) { # taxa block did not exist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 while ($entry = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 local ($_) = $entry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 s/\[[^[]+\]//g; #] remove comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 if ($interleave) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 /^\s+$/ and last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 /^\s+$/ and next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 /^\s*;\s*$/ and last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 if (/^\s*('([^']*?)'|([^']\S*))\s+(.*)\s$/) { #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 $name = ($2 || $3);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 $str = $4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 $name =~ s/ /_/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 push @names, $name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 $str =~ s/\s//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 $count = @names;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 $hash{$count} = $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 $self->throw("Not a valid interleaved NEXUS file!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 seqcount [$count] > predeclared [$seqcount] in the first section") if $count > $seqcount;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 # interleaved sections
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 while( $entry = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 local ($_) = $entry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 s/\[[^[]+\]//g; #] remove comments
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 last if /^\s*;/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 $count = 0, next if $entry =~ /^\s*$/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 if (/^\s*('([^']*?)'|([^']\S*))\s+(.*)\s$/) { #'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 $str = $4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 $str =~ s/\s//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 $count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 $hash{$count} .= $str;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 $self->throw("Not a valid interleaved NEXUS file!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 seqcount [$count] > predeclared [$seqcount] ") if $count > $seqcount;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 return 0 if @names < 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 # sequence creation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 foreach $name ( @names ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 $count++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 $seqname = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 $start = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 $end = $3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 $seqname=$name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 $start = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 $str = $hash{$count};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 $str =~ s/[^A-Za-z]//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 $end = length($str);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 # consistency test
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 $self->throw("Length of sequence [$seqname] is not [$residuecount]! ")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 unless CORE::length($hash{$count}) == $residuecount;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 $seq = new Bio::LocatableSeq('-seq'=>$hash{$count},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 '-id'=>$seqname,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 '-start'=>$start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 '-end'=>$end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 'alphabet'=>$alphabet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 $aln->add_seq($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 # if matchchar is used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 $aln->unmatch($match) if $match;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 # if equate ( e.g. equate="T=C G=A") is used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 if ($equate) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 $aln->map_chars($1, $2) while $equate =~ /(\S)=(\S)/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 while ($entry !~ /endblock/i) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 $entry = $self->_readline;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 return $aln;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 sub _read_taxlabels {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 my ($self) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 my ($name, @names);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 while (my $entry = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 ($name) = $entry =~ /\s*(\S+)\s+/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 $name =~ s/\[[^\[]+\]//g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 $name =~ s/\W/_/g;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 push @names, $name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 last if /^\s*;/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 return @names;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 =head2 write_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 Title : write_aln
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 Usage : $stream->write_aln(@aln)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 Function: Writes the $aln object into the stream in interleaved NEXUS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 format. Everything is written into a data block.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 SimpleAlign methods match_char, missing_char and gap_char must be set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 if you want to see them in the output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 Returns : 1 for success and 0 for error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 Args : L<Bio::Align::AlignI> object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 sub write_aln {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 my ($self,@aln) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 my $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 my $wrapped = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 my $maxname;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 my ($length,$date,$name,$seq,$miss,$pad,%hash,@arr,$tempcount,$index );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 my ($match, $missing, $gap,$symbols) = ('', '', '','');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 foreach my $aln (@aln) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 $self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 $self->throw("All sequences in the alignment must be the same length")
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 unless $aln->is_flush($self->verbose);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 $length = $aln->length();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 $self->_print (sprintf("#NEXUS\n[TITLE: %s]\n\nbegin data;\ndimensions ntax=%s nchar=%s;\n",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 $aln->id, $aln->no_sequences, $length));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 $match = "match=". $aln->match_char if $aln->match_char;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 $missing = "missing=". $aln->missing_char if $aln->missing_char;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 $gap = "gap=". $aln->gap_char if $aln->gap_char;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 $symbols = 'symbols="'.join('',$aln->symbol_chars). '"' if( $aln->symbol_chars);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 $self->_print (sprintf("format interleave datatype=%s %s %s %s %s;\n\nmatrix\n",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 $aln->get_seq_by_pos(1)->alphabet, $match, $missing, $gap, $symbols));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 my $indent = $aln->maxdisplayname_length;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 $aln->set_displayname_flat();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 foreach $seq ( $aln->each_seq() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 $name = $aln->displayname($seq->get_nse());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 $name = sprintf("%-${indent}s", $name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 $hash{$name} = $seq->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 push(@arr,$name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 while( $count < $length ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 # there is another block to go!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 foreach $name ( @arr ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 my $dispname = $name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 # $dispname = '' if $wrapped;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 $self->_print (sprintf("%${indent}s ",$dispname));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 $tempcount = $count;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 $index = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 while( ($tempcount + 10 < $length) && ($index < 5) ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 $self->_print (sprintf("%s ",substr($hash{$name},$tempcount,10)));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 $tempcount += 10;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 $index++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 # last
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 if( $index < 5) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 # space to print!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 $self->_print (sprintf("%s ",substr($hash{$name},$tempcount)));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 $tempcount += 10;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 $self->_print ("\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 $self->_print ("\n\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 $count = $tempcount;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 $wrapped = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 $self->_print (";\n\nendblock;\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 $self->flush if $self->_flush_on_write && defined $self->_fh;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 1;