annotate variant_effect_predictor/Bio/Assembly/IO/phrap.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: phrap.pm,v 1.1 2002/11/04 14:38:14 heikki Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl driver for phrap.out file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Copyright by Robson F. de Souza
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Bio::Assembly::IO::phrap - driver to load phrap.out files.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 =head1 SYNOPSYS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 # Building an input stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 use Bio::Assembly::IO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 # Assembly loading methods
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 $io = new Bio::Assembly::IO(-file=>"SGC0-424.phrap.out",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 -format=>"phrap");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 $assembly = $io->next_assembly;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 This package was developed to load the phrap.out files from the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 (phred/phrap/consed) package by Phill Green. This files contain just
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 the messages printed to standard out by phrap when building an
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 assembly. This output is redirected by phredPhrap perl-script to a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 file in the project's directory and hold some bit of information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 regarding assembly quality, connections between contigs and clone's
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 position inside contigs. It should be noted that such files have no
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 data about the sequence. neither for contig consensus nor for any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 aligned sequence. Anyway, such information may be loaded from Fasta
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 files in the projects directory and added to the assembly object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 later.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 Note that, because no sequence is loaded for the contig consensus and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 locations for aligned sequences are only given in "ungapped consensus"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 coordinates in a phrap.out file, you can't make coordinate changes in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 assemblies loaded by pharp.pm, unless you add an aligned
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 coordinates for each sequence to each contig's features collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 yourself. See L<Bio::Assembly::Contig::Coordinate_Systems> and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 L<Bio::Assembly::Contig::Feature_collection>..
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 This driver also loads singlets into the assembly contigs as Bio::Seq
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 objects, altough without their sequence strings. It also adds a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 feature for the entire sequence, thus storing the singlet length in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 its end position, and adds a tag '_nof_trimmed_nonX' to the feature,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 which stores the number of non-vector bases in the singlet.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 =head2 Implementation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 Assemblies are loaded into Bio::Assembly::Scaffold objects composed by
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 Bio::Assembly::Contig objects. No features are added to Bio::Assembly::Contig
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 "_aligned_coord:$seqID" feature class, therefore you can't make
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 coordinate changes in contigs loaded by this module. Contig objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 created by this module will have the following special feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 classes, identified by their primary tags, in their features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 collection:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 "_main_contig_feature:$ID" : main feature for contig $ID. This
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 feature is used to store information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 about the entire consensus
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 sequence. This feature always start at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 base 1 and its end position is the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 consensus sequence length. A tag,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 'trimmed_length' holds the length of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 trimmed good quality region inside the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 consensus sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 "_covered_region:$index" : coordinates for valid clones inside the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 contig. $index is the covered region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 number, starting at 1 for the covered
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 region closest to the consensus sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 first base.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 "_unalign_coord:$seqID" : location of a sequence in "ungapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 consensus" coordinates (consensus
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 sequence without gaps). Primary and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 secondary scores, indel and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 substitutions statistics are stored as
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 feature tags.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 "_internal_clones:$cloneID" : clones inside contigs $cloneID should be
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 used as the unique id for each
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 clone. These features have six tags:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 '_1st_name', which is the id of the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 upstream (5') aligned sequence
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 delimiting the clone; '_1st_strand', the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 upstream sequence strand in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 alignment; '_2nd_name', downstream (3')
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 sequence id; '_2nd_strand', the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 downstream sequence strand in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 alignment; '_length', unaligned clone
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 length; '_rejected', a boolean flag,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 which is false if the clone is valid and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 true if it was rejected.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 All coordinates for the features above are expressed as "ungapped
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 consensus" coordinates (See L<Bio::Assembly::Contig::Coordinate_Systems>..
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 =head2 Feature collection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 Bioperl modules. Send your comments and suggestions preferably to the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 Bioperl mailing lists Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 http://bio.perl.org/MailList.html - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 the bugs and their resolution. Bug reports can be submitted via email
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 bioperl-bugs@bio.perl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 =head1 AUTHOR - Robson Francisco de Souza
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 Email rfsouza@citri.iq.usp.br
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 The rest of the documentation details each of the object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 methods. Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 package Bio::Assembly::IO::phrap;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 use Bio::Assembly::IO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 use Bio::Assembly::Scaffold;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 use Bio::Assembly::Contig;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 use Bio::LocatableSeq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 use Bio::Seq;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 use Bio::SeqFeature::Generic;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 @ISA = qw(Bio::Assembly::IO);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 =head2 next_assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 Title : next_assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 Usage : $unigene = $stream->next_assembly()
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 Function: returns the next assembly in the stream
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 Returns : Bio::Assembly::Scaffold object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 Args : NONE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 sub next_assembly {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 my $self = shift; # Package reference
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 # Resetting assembly data structure
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 my $Assembly = Bio::Assembly::Scaffold->new(-source=>'phrap');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 # Looping over all phrap out file lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 my ($contigOBJ);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 while ($_ = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 # Loading exact dupicated reads list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 # /Exact duplicate reads:/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 # my @exact_dupl;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 # while (<FILE>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 # last if (/^\s*$/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 # /(\S+)\s+(\S+)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 # push(@exact_dupl,[$1,$2]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 # $self->{'assembly'}{'exact_dupl_reads'} =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 # new Data::Table(\@exact_dupl,['included','excluded'],0);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 # };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 # Loading singlets reads data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 /^(\d+) isolated singletons/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 while ($_ = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 last if (/^$/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 if (/^\s+(\S+)\s+(\d+)\s+\((\d+)\)/) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 my $seqID = $1; my $length = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 my $nof_trimmed_nonX = $3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 my $seq = new Bio::Seq(-strand=>1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 -primary_id=>$seqID);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 my $f = Bio::SeqFeature::Generic->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 (-start=>1, -end=>$seq->length(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 -primary=>$seq->primary_id(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 -tag=>{ '_nof_trimmed_nonX' => $nof_trimmed_nonX }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 $seq->add_SeqFeature($f);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 $Assembly->add_singlet($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 # Loading contig information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 /^Contig (\d+)\.\s+(\d+) reads?; (\d+) bp \(untrimmed\), (\d+) \(trimmed\)\./ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 my $nof_reads = $2; my $length = $3; my $trimmed_length = $4;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 $contigOBJ = Bio::Assembly::Contig->new(-id=>$1, -source=>'phrap');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 my $feat = Bio::SeqFeature::Generic->new(-start=>1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 -end=>$length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 -primary=>"_main_contig_feature:".$contigOBJ->id(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 -tag=>{ '_trimmed_length' => $trimmed_length }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 $contigOBJ->add_features([ $feat ],1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 $Assembly->add_contig($contigOBJ);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 # Loading read information
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 /^(C?)\s+(-?\d+)\s+(\d+)\s+(\S+)\s+(\d+)\s+\(\s*(\d+)\)\s+(\d+\.\d*)\s+(\d+\.\d*)\s+(\d+\.\d*)/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 my $strand = ($1 eq 'C' ? -1 : 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 my $readID = $4; my $start = $2; my $end = $3;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 my $primary_score = $5; my $secondary_score = $6;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 my $substitutions = $7; my $deletions = $8; my $insertions = $9;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 my $seq = Bio::LocatableSeq->new(-start=>$start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 -end=>$end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 -strand=>$strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 -id=>$readID,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 -primary_id=>$readID,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 -alphabet=>'dna');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 my $unalign_coord = Bio::SeqFeature::Generic->new(-start=>$start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 -end=>$end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 -primary=>"_unalign_coord:$readID",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 -tag=>{'_primary_score'=>$primary_score,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 '_secondary_score'=>$secondary_score,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 '_substitutions'=>$substitutions,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 '_insertions'=>,$insertions,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 '_deletions'=>$deletions }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 $unalign_coord->attach_seq($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 $contigOBJ->add_seq($seq); $contigOBJ->add_features([ $unalign_coord ]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 # Loading INTERNAL clones description
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 /INTERNAL\s+Contig\s+(\d+)\s+opp\s+sense/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 my $contigID = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 my $contig = $Assembly->get_contig_by_id($contigID);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 while ($_ = $self->_readline) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 my (@data,$rejected,$c1_strand,$c2_strand);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 (@data = /\s+(\*?)\s+(C?)\s+(\S+)\s+(C?)\s+(\S+)\s+(-?\d+)\s+(-?\d+)\s+(-?\d+)/) && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 if ($data[0] eq '*') { $rejected = 1 } else { $rejected = 0 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 $c1_strand = ($data[1] eq 'C' ? -1 : 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 $c2_strand = ($data[3] eq 'C' ? -1 : 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 (my $clone_name = $data[2]) =~ s/^(\S+)\.\w.*/$1/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 my $clone = Bio::SeqFeature::Generic->new(-start=>$data[6],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 -end=>$data[7],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 -strand=>0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 -primary=>"_internal_clone:$clone_name",
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 -tag=>{'_1st_strand'=>,$c1_strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 '_2nd_strand'=>,$c2_strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 '_1st_name'=>$data[2],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 '_2nd_name'=>$data[4],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 '_length'=>$data[5],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 '_rejected'=>$rejected
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 $contig->add_features([ $clone ]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 /Covered regions:/ && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 my %coord = /(\d+)/g; my $i = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 foreach my $start (sort { $a <=> $b } keys %coord) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 my $cov = Bio::SeqFeature::Generic->new(-start=>$start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 -end=>$coord{$start},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 -primary=>'_covered_region:'.++$i
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 # 1: attach feature to contig consensus, if any
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 $contig->add_features([ $cov ],1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 last; # exit while loop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 }; # /Covered regions:/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 } # while ($_ = $self->_readline)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 }; # /INTERNAL\s+Contig\s+(\d+)\s+opp\s+sense/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 } # while ($_ = $self->_readline)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 return $Assembly;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295 =head2 write_assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 Title : write_assembly
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 Usage : $ass_io->write_assembly($assembly)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 Function: Write the assembly object in Phrap compatible ACE format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 Returns : 1 on success, 0 for error
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 Args : A Bio::Assembly::Scaffold object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 sub write_assemebly {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 $self->throw("Writing phrap.out files is not implemented yet! Sorry...");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 __END__