0
|
1 # $Id: phd.pm,v 1.17 2002/12/09 23:50:23 matsallac Exp $
|
|
2 #
|
|
3 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
|
|
4 # This module is free software; you can redistribute it and/or
|
|
5 # modify it under the same terms as Perl itself.
|
|
6 #
|
|
7 # Copyright Chad Matsalla
|
|
8 #
|
|
9 # You may distribute this module under the same terms as perl itself
|
|
10
|
|
11 # POD documentation - main docs before the code
|
|
12
|
|
13 =head1 NAME
|
|
14
|
|
15 Bio::SeqIO::phd - .phd file input/output stream
|
|
16
|
|
17 =head1 SYNOPSIS
|
|
18
|
|
19 Do not use this module directly. Use it via the L<Bio::SeqIO> class.
|
|
20
|
|
21 =head1 DESCRIPTION
|
|
22
|
|
23 This object can transform .phd files (from Phil Green's phred basecaller)
|
|
24 to and from Bio::Seq::SeqWithQuality objects
|
|
25
|
|
26 =head1 FEEDBACK
|
|
27
|
|
28 =head2 Mailing Lists
|
|
29
|
|
30 User feedback is an integral part of the evolution of this and other
|
|
31 Bioperl modules. Send your comments and suggestions preferably to one
|
|
32 of the Bioperl mailing lists. Your participation is much appreciated.
|
|
33
|
|
34 bioperl-l@bioperl.org - General discussion
|
|
35 http://www.bioperl.org/MailList.shtml - About the mailing lists
|
|
36
|
|
37 =head2 Reporting Bugs
|
|
38
|
|
39 Report bugs to the Bioperl bug tracking system to help us keep track
|
|
40 the bugs and their resolution.
|
|
41 Bug reports can be submitted via email or the web:
|
|
42
|
|
43 bioperl-bugs@bio.perl.org
|
|
44 http://bugzilla.bioperl.org/
|
|
45
|
|
46 =head1 AUTHOR Chad Matsalla
|
|
47
|
|
48 Chad Matsalla
|
|
49 bioinformatics@dieselwurks.com
|
|
50
|
|
51 =head1 CONTRIBUTORS
|
|
52
|
|
53 Jason Stajich, jason@bioperl.org
|
|
54
|
|
55 =head1 APPENDIX
|
|
56
|
|
57 The rest of the documentation details each of the object
|
|
58 methods. Internal methods are usually preceded with a _
|
|
59
|
|
60 =cut
|
|
61
|
|
62 # 'Let the code begin...
|
|
63
|
|
64 package Bio::SeqIO::phd;
|
|
65 use vars qw(@ISA);
|
|
66 use strict;
|
|
67 use Bio::SeqIO;
|
|
68 use Bio::Seq::SeqFactory;
|
|
69
|
|
70 @ISA = qw(Bio::SeqIO);
|
|
71
|
|
72 sub _initialize {
|
|
73 my($self,@args) = @_;
|
|
74 $self->SUPER::_initialize(@args);
|
|
75 if( ! defined $self->sequence_factory ) {
|
|
76 $self->sequence_factory(new Bio::Seq::SeqFactory
|
|
77 (-verbose => $self->verbose(),
|
|
78 -type => 'Bio::Seq::SeqWithQuality'));
|
|
79 }
|
|
80 }
|
|
81
|
|
82 =head2 next_seq()
|
|
83
|
|
84 Title : next_seq()
|
|
85 Usage : $swq = $stream->next_seq()
|
|
86 Function: returns the next phred sequence in the stream
|
|
87 Returns : Bio::Seq::SeqWithQuality object
|
|
88 Args : NONE
|
|
89 Notes : This is really redundant because AFAIK there is no such thing as
|
|
90 a .phd file that contains more then one sequence. It is included as
|
|
91 an interface thing and for consistency.
|
|
92
|
|
93 =cut
|
|
94
|
|
95 sub next_seq {
|
|
96 my ($self,@args) = @_;
|
|
97 my ($entry,$done,$qual,$seq);
|
|
98 my ($id,@lines, @bases, @qualities) = ('');
|
|
99 if (!($entry = $self->_readline)) { return; }
|
|
100 if ($entry =~ /^BEGIN_SEQUENCE\s+(\S+)/) {
|
|
101 $id = $1;
|
|
102 }
|
|
103 my $in_dna = 0;
|
|
104 my $base_number = 0;
|
|
105 while ($entry = $self->_readline) {
|
|
106 return if (!$entry);
|
|
107 chomp($entry);
|
|
108 if ($entry =~ /^BEGIN_CHROMAT:\s+(\S+)/) {
|
|
109 # this is where I used to grab the ID
|
|
110 if (!$id) {
|
|
111 $id = $1;
|
|
112 }
|
|
113 $entry = $self->_readline();
|
|
114 }
|
|
115 if ($entry =~ /^BEGIN_DNA/) {
|
|
116 $entry =~ /^BEGIN_DNA/;
|
|
117 $in_dna = 1;
|
|
118 $entry = $self->_readline();
|
|
119 }
|
|
120 if ($entry =~ /^END_DNA/) {
|
|
121 $in_dna = 0;
|
|
122 }
|
|
123 if ($entry =~ /^END_SEQUENCE/) {
|
|
124 }
|
|
125 if (!$in_dna) { next; }
|
|
126 $entry =~ /(\S+)\s+(\S+)/;
|
|
127 push @bases,$1;
|
|
128 push @qualities,$2;
|
|
129 push(@lines,$entry);
|
|
130 }
|
|
131 # $self->debug("csmCreating objects with id = $id\n");
|
|
132 my $swq = $self->sequence_factory->create
|
|
133 (-seq => join('',@bases),
|
|
134 -qual => \@qualities,
|
|
135 -id => $id,
|
|
136 -primary_id => $id,
|
|
137 -display_id => $id,
|
|
138 );
|
|
139 return $swq;
|
|
140 }
|
|
141
|
|
142 =head2 write_seq
|
|
143
|
|
144 Title : write_seq(-SeqWithQuality => $swq, <comments>)
|
|
145 Usage : $obj->write_seq( -SeqWithQuality => $swq,);
|
|
146 Function: Write out an scf.
|
|
147 Returns : Nothing.
|
|
148 Args : Requires: a reference to a SeqWithQuality object to form the
|
|
149 basis for the scf. Any other arguments are assumed to be comments
|
|
150 and are put into the comments section of the scf. Read the
|
|
151 specifications for scf to decide what might be good to put in here.
|
|
152 Notes : These are the comments that reside in the header of a phd file
|
|
153 at the present time. If not provided in the parameter list for
|
|
154 write_phd(), the following default values will be used:
|
|
155 CHROMAT_FILE: $swq->id()
|
|
156 ABI_THUMBPRINT: 0
|
|
157 PHRED_VERSION: 0.980904.e
|
|
158 CALL_METHOD: phred
|
|
159 QUALITY_LEVELS: 99
|
|
160 TIME: <current time>
|
|
161 TRACE_ARRAY_MIN_INDEX: 0
|
|
162 TRACE_ARRAY_MAX_INDEX: unknown
|
|
163 CHEM: unknown
|
|
164 DYE: unknown
|
|
165 IMPORTANT: This method does not write the trace index where this
|
|
166 call was made. All base calls are placed at index 1.
|
|
167
|
|
168
|
|
169 =cut
|
|
170
|
|
171 sub write_seq {
|
|
172 my ($self,@args) = @_;
|
|
173 my @phredstack;
|
|
174 my ($label,$arg);
|
|
175
|
|
176 my ($swq, $chromatfile, $abithumb,
|
|
177 $phredversion, $callmethod,
|
|
178 $qualitylevels,$time,
|
|
179 $trace_min_index,
|
|
180 $trace_max_index,
|
|
181 $chem, $dye
|
|
182 ) = $self->_rearrange([qw(SEQWITHQUALITY
|
|
183 CHROMAT_FILE
|
|
184 ABI_THUMBPRINT
|
|
185 PHRED_VERSION
|
|
186 CALL_METHOD
|
|
187 QUALITY_LEVELS
|
|
188 TIME
|
|
189 TRACE_ARRAY_MIN_INDEX
|
|
190 TRACE_ARRAY_MAX_INDEX
|
|
191 CHEM
|
|
192 DYE
|
|
193 )], @args);
|
|
194
|
|
195 unless (ref($swq) eq "Bio::Seq::SeqWithQuality") {
|
|
196 $self->throw("You must pass a Bio::Seq::SeqWithQuality object to write_scf as a parameter named \"SeqWithQuality\"");
|
|
197 }
|
|
198 my $id = $swq->id();
|
|
199 if (!$id) { $id = "UNDEFINED in SeqWithQuality Object"; }
|
|
200 push @phredstack,("BEGIN_SEQUENCE $id","","BEGIN_COMMENT","");
|
|
201
|
|
202 $chromatfile = 'undefined in write_phd' unless defined $chromatfile;
|
|
203 push @phredstack,"CHROMAT_FILE: $chromatfile";
|
|
204
|
|
205 $abithumb = 0 unless defined $abithumb;
|
|
206 push @phredstack,"ABI_THUMBPRINT: $abithumb";
|
|
207
|
|
208 $phredversion = "0.980904.e" unless defined $phredversion;
|
|
209 push @phredstack,"PHRED_VERSION: $phredversion";
|
|
210
|
|
211 $callmethod = 'phred' unless defined $callmethod;
|
|
212 push @phredstack,"CALL_METHOD: $callmethod";
|
|
213
|
|
214 $qualitylevels = 99 unless defined $qualitylevels;
|
|
215 push @phredstack,"QUALITY_LEVELS: $qualitylevels";
|
|
216
|
|
217 $time = localtime() unless defined $time;
|
|
218 push @phredstack,"TIME: $time";
|
|
219
|
|
220 $trace_min_index = 0 unless defined $trace_min_index;
|
|
221 push @phredstack,"TRACE_ARRAY_MIN_INDEX: $trace_min_index";
|
|
222
|
|
223 $trace_max_index = '10000' unless defined $trace_max_index;
|
|
224 push @phredstack,"TRACE_ARRAY_MAX_INDEX: $trace_max_index";
|
|
225
|
|
226 $chem = 'unknown' unless defined $chem;
|
|
227 push @phredstack,"CHEM: $chem";
|
|
228
|
|
229 $dye = 'unknown' unless defined $dye;
|
|
230 push @phredstack, "DYE: $dye";
|
|
231
|
|
232 push @phredstack,("END_COMMENT","","BEGIN_DNA");
|
|
233
|
|
234 foreach (@phredstack) { $self->_print($_."\n"); }
|
|
235
|
|
236 my $length = $swq->length();
|
|
237 if ($length eq "DIFFERENT") {
|
|
238 $self->throw("Can't create the phd because the sequence and the quality in the SeqWithQuality object are of different lengths.");
|
|
239 }
|
|
240 for (my $curr = 1; $curr<=$length; $curr++) {
|
|
241 $self->_print (uc($swq->baseat($curr))." ".
|
|
242 $swq->qualat($curr)." 10".
|
|
243 "\n");
|
|
244 }
|
|
245 $self->_print ("END_DNA\n\nEND_SEQUENCE\n");
|
|
246
|
|
247 $self->flush if $self->_flush_on_write && defined $self->_fh;
|
|
248 return 1;
|
|
249 }
|
|
250
|
|
251 1;
|
|
252 __END__
|