annotate variant_effect_predictor/Bio/SeqIO/locuslink.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: locuslink.pm,v 1.2.2.2 2003/03/13 02:09:20 lapp Exp $
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::SeqIO::locuslink
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by Keith Ching <kching at gnf.org>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Keith Ching
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 # (c) Keith Ching, kching at gnf.org, 2002.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 # You may distribute this module under the same terms as perl itself.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 # Refer to the Perl Artistic License (see the license accompanying this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 # software package, or see http://www.perl.com/language/misc/Artistic.html)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 # for the terms under which you may use, modify, and redistribute this module.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 # POD documentation - main docs before the code
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 Bio::SeqIO::locuslink - DESCRIPTION of Object
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 # don't instantiate directly - instead do
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 my $seqio = Bio::SeqIO->new(-format => "locuslink", -file => \STDIN);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 This module parses LocusLink into Bio::SeqI objects with rich
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 annotation, but no sequence.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 The input file has to be in the LL_tmpl format - the tabular format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 will not work.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 The way the current implementation populates the object is rather a
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 draft work than a finished work of art. Note that at this stage the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 locuslink entries cannot be round-tripped, because the parser loses
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 certain information. For instance, most of the alternative transcript
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 descriptions are not retained. The parser also misses any element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 that deals with visual representation (e.g., 'button') except for the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 URLs. Almost all of the pieces of the annotation are kept in the
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 L<Bio::Annotation::Collection> object.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 =head1 FEEDBACK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 =head2 Mailing Lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 User feedback is an integral part of the evolution of this and other
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 Bioperl modules. Send your comments and suggestions preferably to
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 the Bioperl mailing list. Your participation is much appreciated.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 bioperl-l@bioperl.org - General discussion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 http://bioperl.org/MailList.shtml - About the mailing lists
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 =head2 Reporting Bugs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 Report bugs to the Bioperl bug tracking system to help us keep track
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 of the bugs and their resolution. Bug reports can be submitted via
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 email or the web:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 bioperl-bugs@bioperl.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 http://bugzilla.bioperl.org/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 =head1 AUTHOR - Keith Ching
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 Email kching at gnf.org
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 Describe contact details here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 =head1 CONTRIBUTORS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 Hilmar Lapp, hlapp at gmx.net
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 =head1 APPENDIX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 The rest of the documentation details each of the object methods.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 Internal methods are usually preceded with a _
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 package Bio::SeqIO::locuslink;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 use Bio::SeqIO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 use Bio::Seq::SeqFactory;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 use Bio::Species;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 use Bio::Annotation::DBLink;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 #use Bio::Annotation::Reference;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 use Bio::Annotation::Comment;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 use Bio::Annotation::SimpleValue;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 use Bio::Annotation::OntologyTerm;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 use Bio::Annotation::Collection;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 @ISA = qw(Bio::SeqIO);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 # list of all the field names in locuslink
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 my @locuslink_keys = qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 ACCNUM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 ALIAS_PROT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 ALIAS_SYMBOL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 ASSEMBLY
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 BUTTON
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 CDD
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 CHR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 COMP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 CONTIG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 CURRENT_LOCUSID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 DB_DESCR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 DB_LINK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 ECNUM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 EVID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 EXTANNOT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 GO
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 GRIF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 LINK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 LOCUSID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 LOCUS_CONFIRMED
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 LOCUS_TYPE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 MAP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 MAPLINK
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 NC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 NG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 NM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 NP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 NR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 OFFICIAL_GENE_NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 OFFICIAL_SYMBOL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 OMIM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 ORGANISM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 PHENOTYPE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 PHENOTYPE_ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 PMID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 PREFERRED_GENE_NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 PREFERRED_PRODUCT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 PREFERRED_SYMBOL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 PRODUCT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 PROT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 RELL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 STATUS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 STS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 SUMFUNC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 SUMMARY
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 TRANSVAR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 TYPE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 UNIGENE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 XG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 XM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 XP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 XR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 # list of fields to make simple annotations from
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 # fields not listed here or as a key in feature hash are ignored (lost).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my %anntype_map = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 SimpleValue => [qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 ALIAS_PROT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 ALIAS_SYMBOL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 CDD
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 CHR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 CURRENT_LOCUSID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 ECNUM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 EXTANNOT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 MAP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 NC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 NR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 OFFICIAL_GENE_NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 OFFICIAL_SYMBOL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 PHENOTYPE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 PREFERRED_GENE_NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 PREFERRED_PRODUCT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 PREFERRED_SYMBOL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 PRODUCT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 RELL
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 SUMFUNC
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 ],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 Comment => [qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 SUMMARY
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 )
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 ],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 # certain fields are not named the same as the symgene database list
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 my %dbname_map = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 pfam => 'Pfam',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 smart => 'SMART',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 NM => 'RefSeq',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 NP => 'RefSeq',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 XP => 'RefSeq',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 XM => 'RefSeq',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 NG => 'RefSeq',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 XG => 'RefSeq',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 XR => 'RefSeq',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 PROT => 'GenBank',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 ACCNUM => 'GenBank',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 CONTIG => 'GenBank',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 # certain fields are not named the same as the symgene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 # database list: rename the fields the symgene database name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 # key = field name in locuslink
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 # value = database name in sym
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 #GO => 'GO',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 OMIM => 'MIM',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 GRIF => 'GRIF',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 STS => 'STS',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 UNIGENE => 'UniGene',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 # certain CDD entries use the wrong prefix for the accession number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 # cddprefix will replace the key w/ the value for these entries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 my %cddprefix = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 pfam => 'PF',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 smart => 'SM',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 # alternate mappings if one field does not exist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 my %alternate_map = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 OFFICIAL_GENE_NAME => 'PREFERRED_GENE_NAME',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 OFFICIAL_SYMBOL => 'PREFERRED_SYMBOL',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 # for these field names, we only care about the first value X in value X|Y|Z
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 my @ll_firstelements = qw(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 NM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 NP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 NG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 XG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 XM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 XP
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 XR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 PROT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 STS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 ACCNUM
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 CONTIG
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 GRIF
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 # these fields need to be flattened into a single string, using the given
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 # join string
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 my %flatten_tags = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 ASSEMBLY => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 ORGANISM => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 OFFICIAL_SYMBOL => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 OFFICIAL_GENE_NAME => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 LOCUSID => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 PMID => '',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 PREFERRED_SYMBOL => ', ',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 PREFERRED_GENE_NAME => ', '
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 # set the default search pattern for all the field names
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 my %feature_pat_map = map { ($_ , "^$_: (.+)\n"); } @locuslink_keys;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 sub _initialize {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 my($self,@args) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 $self->SUPER::_initialize(@args);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 # overwrite the search pattern w/ the first value pattern
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 foreach my $key(@ll_firstelements){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 $feature_pat_map{$key}="^$key: ([^|]+)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 # special search pattern for cdd entries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 foreach my $key(keys %cddprefix) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 $feature_pat_map{$key}='^CDD: .+\|'.$key.'(\d+)';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 # special patterns for specific fields
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 $feature_pat_map{MAP} = '^MAP: (.+?)\|';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 $feature_pat_map{MAPHTML} = '^MAP: .+\|(<.+>)\|';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 $feature_pat_map{GO} = '^GO: .+\|.+\|\w+\|(GO:\d+)\|';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 $feature_pat_map{GO_DESC} = '^GO: .+\|(.+)\|\w+\|GO:\d+\|';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 $feature_pat_map{GO_CAT} = '^GO: (.+)\|.+\|\w+\|GO:\d+\|';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 $feature_pat_map{EXTANNOT} = '^EXTANNOT: (.+)\|(.+)\|\w+\|.+\|\d+';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 # set the sequence factory of none has been set already
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 if(! $self->sequence_factory()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 $self->sequence_factory(Bio::Seq::SeqFactory->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 -type => 'Bio::Seq::RichSeq'));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 #########################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 sub search_pattern{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 #########################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 my ($self,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 $entry, #text to search
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 $searchconfirm, #to make sure you got the right thing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 $searchpattern,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 $searchtype) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 my @query = $entry=~/$searchpattern/gm;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 if ($searchconfirm ne "FALSE"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 $self->warn("No $searchtype found\n$entry\n") unless @query;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 foreach (@query){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 if (!($_=~/$searchconfirm/)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311 $self->throw("error\n$entry\n$searchtype parse $_ does not match $searchconfirm\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313 }#endforeach
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 }#endsearchconfirm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 return(@query);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 }#endsub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 ############
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 sub read_species{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 ############
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 my ($spline)=@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 my $species;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 my $genus;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 ($genus,$species)=$spline=~/([^ ]+) ([^ ]+)/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 my $make = Bio::Species->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 $make->classification( ($species,$genus) );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328 return $make;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 sub read_dblink{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 my ($ann,$db,$ref)=@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 my @results=$ref ? @$ref : ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 foreach my $id(@results){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 if($id){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 $ann->add_Annotation('dblink',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 Bio::Annotation::DBLink->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 -database =>$db ,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 -primary_id =>$id));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 return($ann);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 sub read_reference{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 my ($ann,$db,$results)=@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 if($results){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 chomp($results);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 my @ids=split(/,/,$results);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 $ann = read_dblink($ann,$db,\@ids) if @ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 return $ann;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 }#endsub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 sub add_annotation{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 my ($ac,$type,$text,$anntype)=@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 my @args;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 $anntype = 'SimpleValue' unless $anntype;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 SWITCH : {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 $anntype eq 'SimpleValue' && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 push(@args, -value => $text, -tagname => $type);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 last SWITCH;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 $anntype eq 'Comment' && do {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 push(@args, -text => $text, -tagname => 'comment');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 last SWITCH;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382 $ac->add_Annotation("Bio::Annotation::$anntype"->new(@args));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 return($ac);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 }#endsub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388 sub add_annotation_ref{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 my ($ann,$type,$textref)=@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 my @text=$textref ? @$textref : ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 foreach my $text(@text){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 $ann->add_Annotation($type,Bio::Annotation::SimpleValue->new(-value => $text));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 return($ann);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 }#endsub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402 sub make_unique{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 ##############
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 my ($ann,$key) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 my %seen = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 foreach my $dbl ($ann->remove_Annotations($key)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 if(! $seen{$dbl->as_text()}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 $seen{$dbl->as_text()} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 $ann->add_Annotation($dbl);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 return $ann;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 sub next_seq{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 ##############
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 my ($self, @args)=@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 my (@results,$search,$ref,$cddref);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 # LOCUSLINK entries begin w/ >>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 local $/="\n>>";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 # slurp in a whole entry and return if no more entries
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 return unless my $entry = $self->_readline;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 # strip the leading '>>' is it's the first entry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 if (index($entry,'>>') == 0) { #first entry
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 $entry = substr($entry,2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 # we aren't interested in obsoleted entries, so we need to loop
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 # and skip those until we've found the next not obsoleted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 my %record = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 while($entry && ($entry =~ /\w/)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 if (!($entry=~/LOCUSID/)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 $self->throw("No LOCUSID in first line of record. ".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 "Not LocusLink in my book.");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 # see whether it's an obsoleted entry, and if so jump to the next
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 # one entry right away
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 if($entry =~ /^CURRENT_LOCUSID:/m) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 # read next entry and continue
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 $entry = $self->_readline;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 %record = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 # loop through list of features and get field values
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 # place into record hash as array refs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 foreach my $key (keys %feature_pat_map){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 $search=$feature_pat_map{$key};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 @results=$self->search_pattern($entry,'FALSE',$search,$search);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 $record{$key} = @results ? [@results] : undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 }#endfor
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 # terminate loop as this one hasn't been obsoleted
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 # we have reached the end-of-file ...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 return unless %record;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 # special processing for CDD entries like pfam and smart
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 my ($PRESENT,@keep);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 foreach my $key(keys %cddprefix){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 #print "check CDD $key\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 if($record{$key}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 @keep=();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 foreach my $list (@{$record{$key}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 # replace AC with correct AC number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474 push(@keep,$cddprefix{$key}.$list);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 # replace CDD ref with correctly prefixed AC number
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477 $record{$key} = [@keep];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480 # modify CDD references @=();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 if($record{CDD}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482 @keep=();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 foreach my $cdd (@{$record{CDD}}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 $PRESENT = undef;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 foreach my $key (keys %cddprefix) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486 if ($cdd=~/$key/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 $PRESENT = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 push(@keep,$cdd) if(! $PRESENT);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 $record{CDD} = [@keep];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496 # create annotation collection - we'll need it now
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497 my $ann = Bio::Annotation::Collection->new();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 foreach my $field(keys %dbname_map){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500 $ann=read_dblink($ann,$dbname_map{$field},$record{$field});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 # add GO link as an OntologyTerm annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504 if($record{GO}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 for(my $j = 0; $j < @{$record{GO}}; $j++) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 my $goann = Bio::Annotation::OntologyTerm->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 -identifier => $record{GO}->[$j],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 -name => $record{GO_DESC}->[$j],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509 -ontology => $record{GO_CAT}->[$j]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 $ann->add_Annotation($goann);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514 $ann=add_annotation_ref($ann,'URL',$record{LINK});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 $ann=add_annotation_ref($ann,'URL',$record{DB_LINK});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 # presently we can't store types of dblinks - hence make unique
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 make_unique($ann,'dblink');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 # everything else gets a simple tag or comment value annotation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 foreach my $anntype (keys %anntype_map) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522 foreach my $key (@{$anntype_map{$anntype}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 if($record{$key}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 foreach (@{$record{$key}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 #print "$key\t\t$_\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 $ann=add_annotation($ann,$key,$_,$anntype);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 # flatten designated attributes into a scalar value
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 foreach my $field (keys %flatten_tags) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 if($record{$field}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 $record{$field} = join($flatten_tags{$field},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 @{$record{$field}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 # annotation that expects the array flattened out
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 $ann=read_reference($ann,'PUBMED',$record{PMID});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 if($record{ASSEMBLY}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543 my @assembly=split(/,/,$record{ASSEMBLY});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 $ann=read_dblink($ann,'GenBank',\@assembly);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547 # replace fields w/ alternate if original does not exist
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 foreach my $fieldval (keys %alternate_map){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 if((! $record{$fieldval}) && ($record{$alternate_map{$fieldval}})){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 $record{$fieldval}=$record{$alternate_map{$fieldval}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 # create sequence object (i.e., let seq.factory create one)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 my $seq = $self->sequence_factory->create(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556 -verbose => $self->verbose(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 -accession_number => $record{LOCUSID},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 -desc => $record{OFFICIAL_GENE_NAME},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 -display_id => $record{OFFICIAL_SYMBOL},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560 -species => read_species($record{ORGANISM}),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 -annotation => $ann);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 # dump out object contents
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564 # show_obj([$seq]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 return($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 sub show_obj{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 ################
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 my ($seqlistref)=@_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575 my @list=@$seqlistref;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 my $out = Bio::SeqIO->new('-fh' => \*STDOUT, -format => 'genbank' );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 my ($ann,@values,$val);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 foreach my $seq(@list){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580 $out->write_seq($seq);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581 $ann=$seq->annotation;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 foreach my $key ( $ann->get_all_annotation_keys() ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 @values = $ann->get_Annotations($key);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 foreach my $value ( @values ) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 # value is an Bio::AnnotationI, and defines a "as_text" method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 $val=$value->as_text;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 print "Annotation ",$key,"\t\t",$val,"\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 }#endsub
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 1;