annotate variant_effect_predictor/Bio/Assembly/IO/ace.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: ace.pm,v 1.1 2002/11/04 14:38:14 heikki Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 ## BioPerl module for Bio::Assembly::IO::ace
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Copyright by Robson F. de Souza
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Bio::Assembly::IO::ace - module to load phrap ACE files.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 =head1 SYNOPSYS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 # Building an input stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 use Bio::Assembly::IO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 # Assembly loading methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 $io = new Bio::Assembly::IO(-file=>"SGC0-424.fasta.screen.ace.1",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 -format=>"ace");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 $assembly = $io->next_assembly;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 This package loads the ACE files from the (phred/phrap/consed) package
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 by Phill Green. It was written to be used as a driver module for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 Bio::Assembly::IO input/output.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 =head2 Implemention
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 Assemblies are loaded into Bio::Assembly::Scaffold objects composed by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 Bio::Assembly::Contig objects. In addition to default
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 "_aligned_coord:$seqID" feature class from Bio::Assembly::Contig, contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 objects loaded by this module will have the following special feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 classes in their feature collection:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 "_align_clipping:$seqID" : location of subsequence in sequence $seqID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 which is aligned to the contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 "_quality_clipping:$seqID" : location of good quality subsequence for
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 sequence $seqID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 "consensus tags", as they are called in Consed's documentation, is
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 equivalent to a bioperl sequence feature and, therefore, are added to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 the feature collection using their type field (see Consed's README.txt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 file) as primary tag.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 "read tags" are also sequence features and are stored as
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 sub_SeqFeatures of the sequence's coordinate feature (the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 corresponding "_aligned_coord:$seqID" feature, easily accessed through
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 get_seq_coord() method).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 "whole assembly tags" have no start and end, as they are not
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 associated to any particular sequence in the assembly, and are added
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 to the assembly's annotation collection using phrap as tag.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 Bioperl modules. Send your comments and suggestions preferably to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 Bioperl mailing lists Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 =head1 AUTHOR - Robson Francisco de Souza
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 Email rfsouza@citri.iq.usp.br
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 methods. Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 package Bio::Assembly::IO::ace;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 use Bio::Assembly::IO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 use Bio::Assembly::Scaffold;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 use Bio::Assembly::Contig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 use Bio::LocatableSeq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 use Bio::Annotation::SimpleValue;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 use Bio::Seq::PrimaryQual;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 use Bio::SeqFeature::Generic;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 @ISA = qw(Bio::Assembly::IO);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 =head1 Parser methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 =head2 next_assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 Title : next_assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 Usage : $unigene = $stream->next_assembly()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 Function: returns the next assembly in the stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 Returns : Bio::Assembly::Scaffold object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 Args : NONE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 sub next_assembly {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 my $self = shift; # Object reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 local $/="\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 # Resetting assembly data structure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 my $assembly = Bio::Assembly::Scaffold->new(-source=>'phrap');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 # Looping over all ACE file lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 my ($contigOBJ,$read_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 my $read_data = {}; # Temporary holder for read data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 while ($_ = $self->_readline) { # By now, ACE files hold a single assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 # Loading assembly information (ASsembly field)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 # (/^AS (\d+) (\d+)/) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 # $assembly->_set_nof_contigs($1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 # $assembly->_set_nof_sequences_in_contigs($2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 # Loading contig sequence (COntig sequence field)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 (/^CO Contig(\d+) (\d+) (\d+) (\d+) (\w+)/) && do { # New contig found!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 my $contigID = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 $contigOBJ = Bio::Assembly::Contig->new(-source=>'phrap', -id=>$contigID);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 # $contigOBJ->set_nof_bases($2); # Contig length in base pairs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 # $contigOBJ->set_nof_reads($3); # Number of reads in this contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 # $contigOBJ->set_nof_segments($4); # Number of read segments selected for consensus assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 my $ori = ($5 eq 'U' ? 1 : -1); # 'C' if contig was complemented (using consed) or U if not (default)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 $contigOBJ->strand($ori);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 my $consensus_sequence = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 while ($_ = $self->_readline) { # Looping over contig lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 chomp; # Drop <ENTER> (\n) on current line
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 last if (/^$/); # Stop if empty line (contig end) is found
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 s/\*/-/g; # Forcing '-' as gap symbol
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 $consensus_sequence .= $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 my $consensus_length = length($consensus_sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 $consensus_sequence = Bio::LocatableSeq->new(-seq=>$consensus_sequence,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 -start=>1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 -end=>$consensus_length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 -id=>$contigID);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 $contigOBJ->set_consensus_sequence($consensus_sequence);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 $assembly->add_contig($contigOBJ);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 # Loading contig qualities... (Base Quality field)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 /^BQ/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my $consensus = $contigOBJ->get_consensus_sequence()->seq();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 my ($i,$j,@tmp);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 my @quality = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 $j = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 while ($_ = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 last if (/^$/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 @tmp = grep { /^\d+$/ } split(/\s+/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 my $previous = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 my $next = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 while ($i<=$#tmp) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 # IF base is a gap, quality is the average for neighbouring sites
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 if (substr($consensus,$j,1) eq '-') {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 $previous = $tmp[$i-1] unless ($i == 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 if ($i < $#tmp) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 $next = $tmp[$i+1];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 $next = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 push(@quality,int(($previous+$next)/2));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 push(@quality,$tmp[$i]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 $i++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 $j++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 my $qual = Bio::Seq::PrimaryQual->new(-qual=>join(" ",@quality),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 -id=>$contigOBJ->id());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 $contigOBJ->set_consensus_quality($qual);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 # Loading read info... (Assembled From field)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 /^AF (\S+) (C|U) (-*\d+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 $read_name = $1; my $ori = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 $read_data->{$read_name}{'padded_start'} = $3; # aligned start
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 $ori = ( $ori eq 'U' ? 1 : -1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 $read_data->{$read_name}{'strand'} = $ori;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 # Loading base segments definitions (Base Segment field)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 # /^BS (\d+) (\d+) (\S+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 # if (exists($self->{'contigs'}[$contig]{'reads'}{$3}{'segments'})) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 # $self->{'contigs'}[$contig]{'reads'}{$3}{'segments'} .= " " . $1 . " " . $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 # } else { $self->{'contigs'}[$contig]{'reads'}{$3}{'segments'} = $1 . " " . $2 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 # Loading reads... (ReaD sequence field
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 /^RD (\S+) (-*\d+) (\d+) (\d+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 $read_name = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 $read_data->{$read_name}{'length'} = $2; # number_of_padded_bases
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 $read_data->{$read_name}{'contig'} = $contigOBJ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 # $read_data->{$read_name}{'number_of_read_info_items'} = $3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 # $read_data->{$read_name}{'number_of_tags'} = $4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 my $read_sequence;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 while ($_ = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 last if (/^$/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 s/\*/-/g; # Forcing '-' as gap symbol
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 $read_sequence .= $_; # aligned read sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 my $read = Bio::LocatableSeq->new(-seq=>$read_sequence,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 -start=>1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 -end=>$read_data->{$read_name}{'length'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 -strand=>$read_data->{$read_name}{'strand'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 -id=>$read_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 -primary_id=>$read_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 -alphabet=>'dna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 # Adding read location and sequence to contig ("gapped consensus" coordinates)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 my $padded_start = $read_data->{$read_name}{'padded_start'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 my $padded_end = $padded_start + $read_data->{$read_name}{'length'} - 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 my $coord = Bio::SeqFeature::Generic->new(-start=>$padded_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 -end=>$padded_end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 -strand=>$read_data->{$read_name}{'strand'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 -tag => { 'contig' => $contigOBJ->id }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 $contigOBJ->set_seq_coord($coord,$read);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 # Loading read trimming and alignment ranges...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 /^QA (-?\d+) (-?\d+) (-?\d+) (-?\d+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 my $qual_start = $1; my $qual_end = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 my $align_start = $3; my $align_end = $4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 unless ($align_start == -1 && $align_end == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 $align_start = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$align_start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 $align_end = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$align_end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 my $align_feat = Bio::SeqFeature::Generic->new(-start=>$align_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 -end=>$align_end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 -strand=>$read_data->{$read_name}{'strand'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 -primary=>"_align_clipping:$read_name");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 $align_feat->attach_seq( $contigOBJ->get_seq_by_name($read_name) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 $contigOBJ->add_features([ $align_feat ], 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 unless ($qual_start == -1 && $qual_end == -1) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 $qual_start = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$qual_start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 $qual_end = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$qual_end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 my $qual_feat = Bio::SeqFeature::Generic->new(-start=>$qual_start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 -end=>$qual_end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 -strand=>$read_data->{$read_name}{'strand'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 -primary=>"_quality_clipping:$read_name");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 $qual_feat->attach_seq( $contigOBJ->get_seq_by_name($read_name) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 $contigOBJ->add_features([ $qual_feat ], 0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 # Loading read description (DeScription fields)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 # /^DS / && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 # /CHEM: (\S+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 # $self->{'contigs'}[$contig]{'reads'}{$read_name}{'chemistry'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 # /CHROMAT_FILE: (\S+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 # $self->{'contigs'}[$contig]{'reads'}{$read_name}{'chromat_file'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 # /DIRECTION: (\w+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 # my $ori = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 # if ($ori eq 'rev') { $ori = 'C' }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 # elsif ($ori eq 'fwd') { $ori = 'U' }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 # $self->{'contigs'}[$contig]{'reads'}{$read_name}{'strand'} = $ori;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 # /DYE: (\S+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 # $self->{'contigs'}[$contig]{'reads'}{$read_name}{'dye'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 # /PHD_FILE: (\S+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 # $self->{'contigs'}[$contig]{'reads'}{$read_name}{'phd_file'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 # /TEMPLATE: (\S+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 # $self->{'contigs'}[$contig]{'reads'}{$read_name}{'template'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 # /TIME: (\S+ \S+ \d+ \d+\:\d+\:\d+ \d+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 # $self->{'contigs'}[$contig]{'reads'}{$read_name}{'phd_time'} = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 # Loading contig tags ('tags' in phrap terminology, but Bioperl calls them features)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 /^CT\s*\{/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 my ($contigID,$type,$source,$start,$end,$date) = split(' ',$self->_readline);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 $contigID =~ s/^Contig//i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 my $extra_info = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 while ($_ = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 last if (/\}/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 $extra_info .= $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 my $contig_tag = Bio::SeqFeature::Generic->new(-start=>$start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 -end=>$end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 -primary=>$type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 -tag=>{ 'source' => $source,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 'creation_date' => $date,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 'extra_info' => $extra_info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 $assembly->get_contig_by_id($contigID)->add_features([ $contig_tag ],1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 # Loading read tags
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 /^RT\s*\{/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 my ($readID,$type,$source,$start,$end,$date) = split(' ',$self->_readline);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 my $extra_info = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 while ($_ = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 last if (/\}/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 $extra_info .= $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 $start = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$start);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 $end = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$end);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 my $read_tag = Bio::SeqFeature::Generic->new(-start=>$start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 -end=>$end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 -primary=>$type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 -tag=>{ 'source' => $source,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 'creation_date' => $date,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 'extra_info' => $extra_info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 });
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 my $contig = $read_data->{$readID}{'contig'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 my $coord = $contig->get_seq_coord( $contig->get_seq_by_name($readID) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 $coord->add_sub_SeqFeature($read_tag);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 # Loading read tags
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 /^WA\s*\{/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 my ($type,$source,$date) = split(' ',$self->_readline);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 my $extra_info = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 while ($_ = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 last if (/\}/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 $extra_info = $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 #? push(@line,\@extra_info);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 my $assembly_tag = join(" ","TYPE: ",$type,"PROGRAM:",$source,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 "DATE:",$date,"DATA:",$extra_info);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 $assembly_tag = Bio::Annotation::SimpleValue->new(-value=>$assembly_tag);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 $assembly->annotation->add_Annotation('phrap',$assembly_tag);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 } # while ($_ = $self->_readline)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 $assembly->update_seq_list();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 return $assembly;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 =head2 write_assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 Title : write_assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 Usage : $ass_io->write_assembly($assembly)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 Function: Write the assembly object in Phrap compatible ACE format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 Returns : 1 on success, 0 for error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 Args : A Bio::Assembly::Scaffold object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 sub write_assemebly {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 $self->throw("Writing phrap ACE files is not implemented yet! Sorry...");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 __END__