Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/SeqIO/phd.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # $Id: phd.pm,v 1.17 2002/12/09 23:50:23 matsallac Exp $ | |
| 2 # | |
| 3 # Copyright (c) 1997-2001 bioperl, Chad Matsalla. All Rights Reserved. | |
| 4 # This module is free software; you can redistribute it and/or | |
| 5 # modify it under the same terms as Perl itself. | |
| 6 # | |
| 7 # Copyright Chad Matsalla | |
| 8 # | |
| 9 # You may distribute this module under the same terms as perl itself | |
| 10 | |
| 11 # POD documentation - main docs before the code | |
| 12 | |
| 13 =head1 NAME | |
| 14 | |
| 15 Bio::SeqIO::phd - .phd file input/output stream | |
| 16 | |
| 17 =head1 SYNOPSIS | |
| 18 | |
| 19 Do not use this module directly. Use it via the L<Bio::SeqIO> class. | |
| 20 | |
| 21 =head1 DESCRIPTION | |
| 22 | |
| 23 This object can transform .phd files (from Phil Green's phred basecaller) | |
| 24 to and from Bio::Seq::SeqWithQuality objects | |
| 25 | |
| 26 =head1 FEEDBACK | |
| 27 | |
| 28 =head2 Mailing Lists | |
| 29 | |
| 30 User feedback is an integral part of the evolution of this and other | |
| 31 Bioperl modules. Send your comments and suggestions preferably to one | |
| 32 of the Bioperl mailing lists. Your participation is much appreciated. | |
| 33 | |
| 34 bioperl-l@bioperl.org - General discussion | |
| 35 http://www.bioperl.org/MailList.shtml - About the mailing lists | |
| 36 | |
| 37 =head2 Reporting Bugs | |
| 38 | |
| 39 Report bugs to the Bioperl bug tracking system to help us keep track | |
| 40 the bugs and their resolution. | |
| 41 Bug reports can be submitted via email or the web: | |
| 42 | |
| 43 bioperl-bugs@bio.perl.org | |
| 44 http://bugzilla.bioperl.org/ | |
| 45 | |
| 46 =head1 AUTHOR Chad Matsalla | |
| 47 | |
| 48 Chad Matsalla | |
| 49 bioinformatics@dieselwurks.com | |
| 50 | |
| 51 =head1 CONTRIBUTORS | |
| 52 | |
| 53 Jason Stajich, jason@bioperl.org | |
| 54 | |
| 55 =head1 APPENDIX | |
| 56 | |
| 57 The rest of the documentation details each of the object | |
| 58 methods. Internal methods are usually preceded with a _ | |
| 59 | |
| 60 =cut | |
| 61 | |
| 62 # 'Let the code begin... | |
| 63 | |
| 64 package Bio::SeqIO::phd; | |
| 65 use vars qw(@ISA); | |
| 66 use strict; | |
| 67 use Bio::SeqIO; | |
| 68 use Bio::Seq::SeqFactory; | |
| 69 | |
| 70 @ISA = qw(Bio::SeqIO); | |
| 71 | |
| 72 sub _initialize { | |
| 73 my($self,@args) = @_; | |
| 74 $self->SUPER::_initialize(@args); | |
| 75 if( ! defined $self->sequence_factory ) { | |
| 76 $self->sequence_factory(new Bio::Seq::SeqFactory | |
| 77 (-verbose => $self->verbose(), | |
| 78 -type => 'Bio::Seq::SeqWithQuality')); | |
| 79 } | |
| 80 } | |
| 81 | |
| 82 =head2 next_seq() | |
| 83 | |
| 84 Title : next_seq() | |
| 85 Usage : $swq = $stream->next_seq() | |
| 86 Function: returns the next phred sequence in the stream | |
| 87 Returns : Bio::Seq::SeqWithQuality object | |
| 88 Args : NONE | |
| 89 Notes : This is really redundant because AFAIK there is no such thing as | |
| 90 a .phd file that contains more then one sequence. It is included as | |
| 91 an interface thing and for consistency. | |
| 92 | |
| 93 =cut | |
| 94 | |
| 95 sub next_seq { | |
| 96 my ($self,@args) = @_; | |
| 97 my ($entry,$done,$qual,$seq); | |
| 98 my ($id,@lines, @bases, @qualities) = (''); | |
| 99 if (!($entry = $self->_readline)) { return; } | |
| 100 if ($entry =~ /^BEGIN_SEQUENCE\s+(\S+)/) { | |
| 101 $id = $1; | |
| 102 } | |
| 103 my $in_dna = 0; | |
| 104 my $base_number = 0; | |
| 105 while ($entry = $self->_readline) { | |
| 106 return if (!$entry); | |
| 107 chomp($entry); | |
| 108 if ($entry =~ /^BEGIN_CHROMAT:\s+(\S+)/) { | |
| 109 # this is where I used to grab the ID | |
| 110 if (!$id) { | |
| 111 $id = $1; | |
| 112 } | |
| 113 $entry = $self->_readline(); | |
| 114 } | |
| 115 if ($entry =~ /^BEGIN_DNA/) { | |
| 116 $entry =~ /^BEGIN_DNA/; | |
| 117 $in_dna = 1; | |
| 118 $entry = $self->_readline(); | |
| 119 } | |
| 120 if ($entry =~ /^END_DNA/) { | |
| 121 $in_dna = 0; | |
| 122 } | |
| 123 if ($entry =~ /^END_SEQUENCE/) { | |
| 124 } | |
| 125 if (!$in_dna) { next; } | |
| 126 $entry =~ /(\S+)\s+(\S+)/; | |
| 127 push @bases,$1; | |
| 128 push @qualities,$2; | |
| 129 push(@lines,$entry); | |
| 130 } | |
| 131 # $self->debug("csmCreating objects with id = $id\n"); | |
| 132 my $swq = $self->sequence_factory->create | |
| 133 (-seq => join('',@bases), | |
| 134 -qual => \@qualities, | |
| 135 -id => $id, | |
| 136 -primary_id => $id, | |
| 137 -display_id => $id, | |
| 138 ); | |
| 139 return $swq; | |
| 140 } | |
| 141 | |
| 142 =head2 write_seq | |
| 143 | |
| 144 Title : write_seq(-SeqWithQuality => $swq, <comments>) | |
| 145 Usage : $obj->write_seq( -SeqWithQuality => $swq,); | |
| 146 Function: Write out an scf. | |
| 147 Returns : Nothing. | |
| 148 Args : Requires: a reference to a SeqWithQuality object to form the | |
| 149 basis for the scf. Any other arguments are assumed to be comments | |
| 150 and are put into the comments section of the scf. Read the | |
| 151 specifications for scf to decide what might be good to put in here. | |
| 152 Notes : These are the comments that reside in the header of a phd file | |
| 153 at the present time. If not provided in the parameter list for | |
| 154 write_phd(), the following default values will be used: | |
| 155 CHROMAT_FILE: $swq->id() | |
| 156 ABI_THUMBPRINT: 0 | |
| 157 PHRED_VERSION: 0.980904.e | |
| 158 CALL_METHOD: phred | |
| 159 QUALITY_LEVELS: 99 | |
| 160 TIME: <current time> | |
| 161 TRACE_ARRAY_MIN_INDEX: 0 | |
| 162 TRACE_ARRAY_MAX_INDEX: unknown | |
| 163 CHEM: unknown | |
| 164 DYE: unknown | |
| 165 IMPORTANT: This method does not write the trace index where this | |
| 166 call was made. All base calls are placed at index 1. | |
| 167 | |
| 168 | |
| 169 =cut | |
| 170 | |
| 171 sub write_seq { | |
| 172 my ($self,@args) = @_; | |
| 173 my @phredstack; | |
| 174 my ($label,$arg); | |
| 175 | |
| 176 my ($swq, $chromatfile, $abithumb, | |
| 177 $phredversion, $callmethod, | |
| 178 $qualitylevels,$time, | |
| 179 $trace_min_index, | |
| 180 $trace_max_index, | |
| 181 $chem, $dye | |
| 182 ) = $self->_rearrange([qw(SEQWITHQUALITY | |
| 183 CHROMAT_FILE | |
| 184 ABI_THUMBPRINT | |
| 185 PHRED_VERSION | |
| 186 CALL_METHOD | |
| 187 QUALITY_LEVELS | |
| 188 TIME | |
| 189 TRACE_ARRAY_MIN_INDEX | |
| 190 TRACE_ARRAY_MAX_INDEX | |
| 191 CHEM | |
| 192 DYE | |
| 193 )], @args); | |
| 194 | |
| 195 unless (ref($swq) eq "Bio::Seq::SeqWithQuality") { | |
| 196 $self->throw("You must pass a Bio::Seq::SeqWithQuality object to write_scf as a parameter named \"SeqWithQuality\""); | |
| 197 } | |
| 198 my $id = $swq->id(); | |
| 199 if (!$id) { $id = "UNDEFINED in SeqWithQuality Object"; } | |
| 200 push @phredstack,("BEGIN_SEQUENCE $id","","BEGIN_COMMENT",""); | |
| 201 | |
| 202 $chromatfile = 'undefined in write_phd' unless defined $chromatfile; | |
| 203 push @phredstack,"CHROMAT_FILE: $chromatfile"; | |
| 204 | |
| 205 $abithumb = 0 unless defined $abithumb; | |
| 206 push @phredstack,"ABI_THUMBPRINT: $abithumb"; | |
| 207 | |
| 208 $phredversion = "0.980904.e" unless defined $phredversion; | |
| 209 push @phredstack,"PHRED_VERSION: $phredversion"; | |
| 210 | |
| 211 $callmethod = 'phred' unless defined $callmethod; | |
| 212 push @phredstack,"CALL_METHOD: $callmethod"; | |
| 213 | |
| 214 $qualitylevels = 99 unless defined $qualitylevels; | |
| 215 push @phredstack,"QUALITY_LEVELS: $qualitylevels"; | |
| 216 | |
| 217 $time = localtime() unless defined $time; | |
| 218 push @phredstack,"TIME: $time"; | |
| 219 | |
| 220 $trace_min_index = 0 unless defined $trace_min_index; | |
| 221 push @phredstack,"TRACE_ARRAY_MIN_INDEX: $trace_min_index"; | |
| 222 | |
| 223 $trace_max_index = '10000' unless defined $trace_max_index; | |
| 224 push @phredstack,"TRACE_ARRAY_MAX_INDEX: $trace_max_index"; | |
| 225 | |
| 226 $chem = 'unknown' unless defined $chem; | |
| 227 push @phredstack,"CHEM: $chem"; | |
| 228 | |
| 229 $dye = 'unknown' unless defined $dye; | |
| 230 push @phredstack, "DYE: $dye"; | |
| 231 | |
| 232 push @phredstack,("END_COMMENT","","BEGIN_DNA"); | |
| 233 | |
| 234 foreach (@phredstack) { $self->_print($_."\n"); } | |
| 235 | |
| 236 my $length = $swq->length(); | |
| 237 if ($length eq "DIFFERENT") { | |
| 238 $self->throw("Can't create the phd because the sequence and the quality in the SeqWithQuality object are of different lengths."); | |
| 239 } | |
| 240 for (my $curr = 1; $curr<=$length; $curr++) { | |
| 241 $self->_print (uc($swq->baseat($curr))." ". | |
| 242 $swq->qualat($curr)." 10". | |
| 243 "\n"); | |
| 244 } | |
| 245 $self->_print ("END_DNA\n\nEND_SEQUENCE\n"); | |
| 246 | |
| 247 $self->flush if $self->_flush_on_write && defined $self->_fh; | |
| 248 return 1; | |
| 249 } | |
| 250 | |
| 251 1; | |
| 252 __END__ |
