comparison variant_effect_predictor/Bio/SeqIO/phd.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 # $Id: phd.pm,v 1.17 2002/12/09 23:50:23 matsallac Exp $
2 #
3 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved.
4 # This module is free software; you can redistribute it and/or
5 # modify it under the same terms as Perl itself.
6 #
7 # Copyright Chad Matsalla
8 #
9 # You may distribute this module under the same terms as perl itself
10
11 # POD documentation - main docs before the code
12
13 =head1 NAME
14
15 Bio::SeqIO::phd - .phd file input/output stream
16
17 =head1 SYNOPSIS
18
19 Do not use this module directly. Use it via the L<Bio::SeqIO> class.
20
21 =head1 DESCRIPTION
22
23 This object can transform .phd files (from Phil Green's phred basecaller)
24 to and from Bio::Seq::SeqWithQuality objects
25
26 =head1 FEEDBACK
27
28 =head2 Mailing Lists
29
30 User feedback is an integral part of the evolution of this and other
31 Bioperl modules. Send your comments and suggestions preferably to one
32 of the Bioperl mailing lists. Your participation is much appreciated.
33
34 bioperl-l@bioperl.org - General discussion
35 http://www.bioperl.org/MailList.shtml - About the mailing lists
36
37 =head2 Reporting Bugs
38
39 Report bugs to the Bioperl bug tracking system to help us keep track
40 the bugs and their resolution.
41 Bug reports can be submitted via email or the web:
42
43 bioperl-bugs@bio.perl.org
44 http://bugzilla.bioperl.org/
45
46 =head1 AUTHOR Chad Matsalla
47
48 Chad Matsalla
49 bioinformatics@dieselwurks.com
50
51 =head1 CONTRIBUTORS
52
53 Jason Stajich, jason@bioperl.org
54
55 =head1 APPENDIX
56
57 The rest of the documentation details each of the object
58 methods. Internal methods are usually preceded with a _
59
60 =cut
61
62 # 'Let the code begin...
63
64 package Bio::SeqIO::phd;
65 use vars qw(@ISA);
66 use strict;
67 use Bio::SeqIO;
68 use Bio::Seq::SeqFactory;
69
70 @ISA = qw(Bio::SeqIO);
71
72 sub _initialize {
73 my($self,@args) = @_;
74 $self->SUPER::_initialize(@args);
75 if( ! defined $self->sequence_factory ) {
76 $self->sequence_factory(new Bio::Seq::SeqFactory
77 (-verbose => $self->verbose(),
78 -type => 'Bio::Seq::SeqWithQuality'));
79 }
80 }
81
82 =head2 next_seq()
83
84 Title : next_seq()
85 Usage : $swq = $stream->next_seq()
86 Function: returns the next phred sequence in the stream
87 Returns : Bio::Seq::SeqWithQuality object
88 Args : NONE
89 Notes : This is really redundant because AFAIK there is no such thing as
90 a .phd file that contains more then one sequence. It is included as
91 an interface thing and for consistency.
92
93 =cut
94
95 sub next_seq {
96 my ($self,@args) = @_;
97 my ($entry,$done,$qual,$seq);
98 my ($id,@lines, @bases, @qualities) = ('');
99 if (!($entry = $self->_readline)) { return; }
100 if ($entry =~ /^BEGIN_SEQUENCE\s+(\S+)/) {
101 $id = $1;
102 }
103 my $in_dna = 0;
104 my $base_number = 0;
105 while ($entry = $self->_readline) {
106 return if (!$entry);
107 chomp($entry);
108 if ($entry =~ /^BEGIN_CHROMAT:\s+(\S+)/) {
109 # this is where I used to grab the ID
110 if (!$id) {
111 $id = $1;
112 }
113 $entry = $self->_readline();
114 }
115 if ($entry =~ /^BEGIN_DNA/) {
116 $entry =~ /^BEGIN_DNA/;
117 $in_dna = 1;
118 $entry = $self->_readline();
119 }
120 if ($entry =~ /^END_DNA/) {
121 $in_dna = 0;
122 }
123 if ($entry =~ /^END_SEQUENCE/) {
124 }
125 if (!$in_dna) { next; }
126 $entry =~ /(\S+)\s+(\S+)/;
127 push @bases,$1;
128 push @qualities,$2;
129 push(@lines,$entry);
130 }
131 # $self->debug("csmCreating objects with id = $id\n");
132 my $swq = $self->sequence_factory->create
133 (-seq => join('',@bases),
134 -qual => \@qualities,
135 -id => $id,
136 -primary_id => $id,
137 -display_id => $id,
138 );
139 return $swq;
140 }
141
142 =head2 write_seq
143
144 Title : write_seq(-SeqWithQuality => $swq, <comments>)
145 Usage : $obj->write_seq( -SeqWithQuality => $swq,);
146 Function: Write out an scf.
147 Returns : Nothing.
148 Args : Requires: a reference to a SeqWithQuality object to form the
149 basis for the scf. Any other arguments are assumed to be comments
150 and are put into the comments section of the scf. Read the
151 specifications for scf to decide what might be good to put in here.
152 Notes : These are the comments that reside in the header of a phd file
153 at the present time. If not provided in the parameter list for
154 write_phd(), the following default values will be used:
155 CHROMAT_FILE: $swq->id()
156 ABI_THUMBPRINT: 0
157 PHRED_VERSION: 0.980904.e
158 CALL_METHOD: phred
159 QUALITY_LEVELS: 99
160 TIME: <current time>
161 TRACE_ARRAY_MIN_INDEX: 0
162 TRACE_ARRAY_MAX_INDEX: unknown
163 CHEM: unknown
164 DYE: unknown
165 IMPORTANT: This method does not write the trace index where this
166 call was made. All base calls are placed at index 1.
167
168
169 =cut
170
171 sub write_seq {
172 my ($self,@args) = @_;
173 my @phredstack;
174 my ($label,$arg);
175
176 my ($swq, $chromatfile, $abithumb,
177 $phredversion, $callmethod,
178 $qualitylevels,$time,
179 $trace_min_index,
180 $trace_max_index,
181 $chem, $dye
182 ) = $self->_rearrange([qw(SEQWITHQUALITY
183 CHROMAT_FILE
184 ABI_THUMBPRINT
185 PHRED_VERSION
186 CALL_METHOD
187 QUALITY_LEVELS
188 TIME
189 TRACE_ARRAY_MIN_INDEX
190 TRACE_ARRAY_MAX_INDEX
191 CHEM
192 DYE
193 )], @args);
194
195 unless (ref($swq) eq "Bio::Seq::SeqWithQuality") {
196 $self->throw("You must pass a Bio::Seq::SeqWithQuality object to write_scf as a parameter named \"SeqWithQuality\"");
197 }
198 my $id = $swq->id();
199 if (!$id) { $id = "UNDEFINED in SeqWithQuality Object"; }
200 push @phredstack,("BEGIN_SEQUENCE $id","","BEGIN_COMMENT","");
201
202 $chromatfile = 'undefined in write_phd' unless defined $chromatfile;
203 push @phredstack,"CHROMAT_FILE: $chromatfile";
204
205 $abithumb = 0 unless defined $abithumb;
206 push @phredstack,"ABI_THUMBPRINT: $abithumb";
207
208 $phredversion = "0.980904.e" unless defined $phredversion;
209 push @phredstack,"PHRED_VERSION: $phredversion";
210
211 $callmethod = 'phred' unless defined $callmethod;
212 push @phredstack,"CALL_METHOD: $callmethod";
213
214 $qualitylevels = 99 unless defined $qualitylevels;
215 push @phredstack,"QUALITY_LEVELS: $qualitylevels";
216
217 $time = localtime() unless defined $time;
218 push @phredstack,"TIME: $time";
219
220 $trace_min_index = 0 unless defined $trace_min_index;
221 push @phredstack,"TRACE_ARRAY_MIN_INDEX: $trace_min_index";
222
223 $trace_max_index = '10000' unless defined $trace_max_index;
224 push @phredstack,"TRACE_ARRAY_MAX_INDEX: $trace_max_index";
225
226 $chem = 'unknown' unless defined $chem;
227 push @phredstack,"CHEM: $chem";
228
229 $dye = 'unknown' unless defined $dye;
230 push @phredstack, "DYE: $dye";
231
232 push @phredstack,("END_COMMENT","","BEGIN_DNA");
233
234 foreach (@phredstack) { $self->_print($_."\n"); }
235
236 my $length = $swq->length();
237 if ($length eq "DIFFERENT") {
238 $self->throw("Can't create the phd because the sequence and the quality in the SeqWithQuality object are of different lengths.");
239 }
240 for (my $curr = 1; $curr<=$length; $curr++) {
241 $self->_print (uc($swq->baseat($curr))." ".
242 $swq->qualat($curr)." 10".
243 "\n");
244 }
245 $self->_print ("END_DNA\n\nEND_SEQUENCE\n");
246
247 $self->flush if $self->_flush_on_write && defined $self->_fh;
248 return 1;
249 }
250
251 1;
252 __END__