annotate variant_effect_predictor/Bio/ClusterIO/unigene.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: unigene.pm,v 1.16.2.2 2003/09/15 01:50:47 andrew Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2 # BioPerl module for Bio::ClusterIO::unigene
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 # Cared for by Andrew Macgregor <andrew@anatomy.otago.ac.nz>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8 # http://meg.otago.ac.nz
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10 # You may distribute this module under the same terms as perl itself
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12 # _history
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 # April 17, 2002 - Initial implementation by Andrew Macgregor
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15 # POD documentation - main docs before the code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 Bio::ClusterIO::unigene - UniGene input stream
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23 Do not use this module directly. Use it via the Bio::ClusterIO class.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27 This object reads from Unigene *.data files downloaded from ftp://ftp.ncbi.nih.gov/repository/UniGene/.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 It doesn't download and decompress the file, you have to do that yourself.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31 =head1 FEEDBACK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33 =head2 Mailing Lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35 User feedback is an integral part of the evolution of this and other
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36 Bioperl modules. Send your comments and suggestions preferably to one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 of the Bioperl mailing lists. Your participation is much appreciated.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 bioperl-l@bioperl.org - General discussion
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40 http://bioperl.org/MailList.shtml - About the mailing lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42 =head2 Reporting Bugs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44 Report bugs to the Bioperl bug tracking system to help us keep track
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45 the bugs and their resolution.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46 Bug reports can be submitted via email or the web:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48 bioperl-bugs@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49 http://bugzilla.bioperl.org/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 =head1 AUTHORS - Andrew Macgregor
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53 Email: andrew@anatomy.otago.ac.nz
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56 =head1 APPENDIX
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58 The rest of the documentation details each of the object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59 methods. Internal methods are usually preceded with a _
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63 #'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 # Let the code begin...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 package Bio::ClusterIO::unigene;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67 use vars qw(@ISA);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70 use Bio::ClusterIO;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 use Bio::Cluster::UniGene;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72 use Bio::Cluster::ClusterFactory;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74 @ISA = qw(Bio::ClusterIO);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76 my %line_is = (
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77 ID => q/ID\s+(\w{2,3}\.\d+)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78 TITLE => q/TITLE\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79 GENE => q/GENE\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80 CYTOBAND => q/CYTOBAND\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81 MGI => q/MGI\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82 LOCUSLINK => q/LOCUSLINK\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 EXPRESS => q/EXPRESS\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84 GNM_TERMINUS => q/GNM_TERMINUS\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 CHROMOSOME => q/CHROMOSOME\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86 STS => q/STS\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 TXMAP => q/TXMAP\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88 PROTSIM => q/PROTSIM\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 SCOUNT => q/SCOUNT\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90 SEQUENCE => q/SEQUENCE\s+(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91 ACC => q/ACC=(\w+)\.?(\d*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92 NID => q/NID=\s*(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93 PID => q/PID=\s*(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 CLONE => q/CLONE=\s*(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95 END => q/END=\s*(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96 LID => q/LID=\s*(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97 MGC => q/MGC=\s*(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98 SEQTYPE => q/SEQTYPE=\s*(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99 TRACE => q/TRACE=\s*(\S.*)/,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100 DELIMITER => q/^\/\//
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 # we set the right factory here
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104 sub _initialize {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105 my($self, @args) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107 $self->SUPER::_initialize(@args);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108 if(! $self->cluster_factory()) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 $self->cluster_factory(Bio::Cluster::ClusterFactory->new(
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110 -type => 'Bio::Cluster::UniGene'));
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114 =head2 next_cluster
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116 Title : next_cluster
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 Usage : $unigene = $stream->next_cluster()
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118 Function: returns the next unigene in the stream
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 Returns : Bio::Cluster::UniGene object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 Args : NONE
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 sub next_cluster {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125 my( $self) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126 local $/ = "//";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 return unless my $entry = $self->_readline;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 # set up the variables we'll need
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 my (%unigene,@express,@locuslink,@chromosome,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131 @sts,@txmap,@protsim,@sequence);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132 my $UGobj;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 # set up the regexes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 # add whitespace parsing and precompile regexes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 #foreach (values %line_is) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138 # $_ =~ s/\s+/\\s+/g;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139 # print STDERR "Regex is $_\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140 # #$_ = qr/$_/x;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141 #}
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143 #$line_is{'TITLE'} = qq/TITLE\\s+(\\S.+)/;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 # run each line in an entry against the regexes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 foreach my $line (split /\n/, $entry) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147 #print STDERR "Wanting to match $line\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148 if ($line =~ /$line_is{ID}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 $unigene{ID} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 elsif ($line =~ /$line_is{TITLE}/gcx ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152 #print STDERR "MATCHED with [$1]\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153 $unigene{TITLE} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155 elsif ($line =~ /$line_is{GENE}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 $unigene{GENE} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158 elsif ($line =~ /$line_is{CYTOBAND}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159 $unigene{CYTOBAND} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161 elsif ($line =~ /$line_is{MGI}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162 $unigene{MGI} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164 elsif ($line =~ /$line_is{LOCUSLINK}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165 @locuslink = split /;/, $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167 elsif ($line =~ /$line_is{EXPRESS}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 my $express = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169 # remove initial semicolon if present
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 $express =~ s/^;//;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 @express = split /\s*;/, $express;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173 elsif ($line =~ /$line_is{GNM_TERMINUS}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 $unigene{GNM_TERMINUS} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 elsif ($line =~ /$line_is{CHROMOSOME}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177 push @chromosome, $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179 elsif ($line =~ /$line_is{TXMAP}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 push @txmap, $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 elsif ($line =~ /$line_is{STS}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183 push @sts, $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 elsif ($line =~ /$line_is{PROTSIM}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186 push @protsim, $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 elsif ($line =~ /$line_is{SCOUNT}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189 $unigene{SCOUNT} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 elsif ($line =~ /$line_is{SEQUENCE}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192 # parse into each sequence line
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193 my $seq = {};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 # add unigene id to each seq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195 #$seq->{unigene_id} = $unigene{ID};
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196 my @items = split /;/,$1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 foreach (@items) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198 if (/$line_is{ACC}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199 $seq->{acc} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200 $seq->{version} = $2 if defined $2;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202 elsif (/$line_is{NID}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 $seq->{nid} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205 elsif (/$line_is{PID}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 $seq->{pid} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 elsif (/$line_is{CLONE}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209 $seq->{clone} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211 elsif (/$line_is{END}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212 $seq->{end} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 elsif (/$line_is{LID}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 $seq->{lid} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 elsif (/$line_is{MGC}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218 $seq->{mgc} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220 elsif (/$line_is{SEQTYPE}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221 $seq->{seqtype} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223 elsif (/$line_is{TRACE}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224 $seq->{trace} = $1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 push @sequence, $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 elsif ($line =~ /$line_is{DELIMITER}/gcx) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230 # at the end of the record, add data to the object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 $UGobj = $self->cluster_factory->create_object(
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 -display_id => $unigene{ID},
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233 -description => $unigene{TITLE},
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234 -size => $unigene{SCOUNT},
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 -members => \@sequence);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236 $UGobj->gene($unigene{GENE}) if defined ($unigene{GENE});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 $UGobj->cytoband($unigene{CYTOBAND}) if defined($unigene{CYTOBAND});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238 $UGobj->mgi($unigene{MGI}) if defined ($unigene{MGI});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239 $UGobj->locuslink(\@locuslink);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240 $UGobj->express(\@express);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241 $UGobj->gnm_terminus($unigene{GNM_TERMINUS}) if defined ($unigene{GNM_TERMINUS});
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242 $UGobj->chromosome(\@chromosome);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 $UGobj->sts(\@sts);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244 $UGobj->txmap(\@txmap);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245 $UGobj->protsim(\@protsim);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248 return $UGobj;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252