annotate variant_effect_predictor/Bio/Perl.pm @ 0:2bc9b66ada89 draft default tip

Uploaded
author mahtabm
date Thu, 11 Apr 2013 06:29:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
1 # $Id: Perl.pm,v 1.16.2.1 2003/03/25 12:32:15 heikki Exp $
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
2 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
3 # BioPerl module for Bio::Perl
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
4 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
5 # Cared for by Ewan Birney <bioperl-l@bio.perl.org>
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
6 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
7 # Copyright Ewan Birney
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
8 #
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
10
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
11 # POD documentation - main docs before the code
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
12
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
13 =head1 NAME
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
14
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
15 Bio::Perl - Functional access to BioPerl for people who don't know objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
16
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
17 =head1 SYNOPSIS
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
18
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
19 use Bio::Perl;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
20
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
21 # will guess file format from extension
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
22 $seq_object = read_sequence($filename);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
23
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
24 # forces genbank format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
25 $seq_object = read_sequence($filename,'genbank');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
26
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
27 # reads an array of sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
28 @seq_object_array = read_all_sequences($filename,'fasta');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
29
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
30 # sequences are Bio::Seq objects, so the following methods work
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
31 # for more info see L<Bio::Seq>, or do 'perldoc Bio/Seq.pm'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
32
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
33 print "Sequence name is ",$seq_object->display_id,"\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
34 print "Sequence acc is ",$seq_object->accession_number,"\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
35 print "First 5 bases is ",$seq_object->subseq(1,5),"\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
36
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
37 # get the whole sequence as a single string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
38
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
39 $sequence_as_a_string = $seq_object->seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
40
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
41 # writing sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
42
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
43 write_sequence(">$filename",'genbank',$seq_object);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
44
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
45 write_sequence(">$filename",'genbank',@seq_object_array);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
46
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
47 # making a new sequence from just strings you have
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
48 # from something else
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
49
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
50 $seq_object = new_sequence("ATTGGTTTGGGGACCCAATTTGTGTGTTATATGTA",
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
51 "myname","AL12232");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
52
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
53
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
54 # getting a sequence from a database (assumes internet connection)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
55
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
56 $seq_object = get_sequence('swissprot',"ROA1_HUMAN");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
57
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
58 $seq_object = get_sequence('embl',"AI129902");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
59
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
60 $seq_object = get_sequence('genbank',"AI129902");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
61
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
62 # BLAST a sequence (assummes an internet connection)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
63
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
64 $blast_report = blast_sequence($seq_object);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
65
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
66 write_blast(">blast.out",$blast_report);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
67
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
68
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
69 =head1 DESCRIPTION
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
70
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
71 Easy first time access to BioPerl via functions
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
72
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
73 =head1 FEEDBACK
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
74
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
75 =head2 Mailing Lists
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
76
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
77 User feedback is an integral part of the evolution of this and other
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
78 Bioperl modules. Send your comments and suggestions preferably to one
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
79 of the Bioperl mailing lists. Your participation is much appreciated.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
80
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
81 bioperl-l@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
82
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
83 =head2 Reporting Bugs
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
84
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
85 Report bugs to the Bioperl bug tracking system to help us keep track
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
86 the bugs and their resolution. Bug reports can be submitted via email
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
87 or the web:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
88
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
89 bioperl-bugs@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
90 http://bugzilla.bioperl.org/
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
91
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
92 =head1 AUTHOR - Ewan Birney
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
93
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
94 Email bioperl-l@bio.perl.org
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
95
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
96 Describe contact details here
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
97
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
98 =head1 APPENDIX
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
99
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
100 The rest of the documentation details each of the object methods.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
101 Internal methods are usually preceded with a _
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
102
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
103 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
104
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
105 #'
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
106 # Let the code begin...
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
107
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
108
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
109 package Bio::Perl;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
110 use vars qw(@ISA @EXPORT @EXPORT_OK $DBOKAY);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
111 use strict;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
112 use Carp;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
113 use Exporter;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
114
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
115 use Bio::SeqIO;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
116 use Bio::Seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
117 BEGIN {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
118 eval {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
119 require Bio::DB::EMBL;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
120 require Bio::DB::GenBank;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
121 require Bio::DB::SwissProt;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
122 require Bio::DB::RefSeq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
123 require Bio::DB::GenPept;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
124 };
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
125 if( $@ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
126 $DBOKAY = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
127 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
128 $DBOKAY = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
129 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
130 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
131
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
132 @ISA = qw(Exporter);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
133
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
134 @EXPORT = qw(read_sequence read_all_sequences write_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
135 new_sequence get_sequence translate translate_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
136 reverse_complement revcom revcom_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
137 reverse_complement_as_string blast_sequence write_blast);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
138
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
139 @EXPORT_OK = @EXPORT;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
140
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
141
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
142 =head2 read_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
143
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
144 Title : read_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
145 Usage : $seq = read_sequence('sequences.fa')
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
146 $seq = read_sequence($filename,'genbank');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
147
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
148 # pipes are fine
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
149 $seq = read_sequence("my_fetching_program $id |",'fasta');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
150
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
151 Function: Reads the top sequence from the file. If no format is given, it will
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
152 try to guess the format from the filename. If a format is given, it
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
153 forces that format. The filename can be any valid perl open() string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
154 - in particular, you can put in pipes
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
155
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
156 Returns : A Bio::Seq object. A quick synopsis:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
157 $seq_object->display_id - name of the sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
158 $seq_object->seq - sequence as a string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
159
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
160 Args : Two strings, first the filename - any Perl open() string is ok
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
161 Second string is the format, which is optional
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
162
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
163 For more information on Seq objects see L<Bio::Seq>.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
164
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
165 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
166
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
167 sub read_sequence{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
168 my ($filename,$format) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
169
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
170 if( !defined $filename ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
171 confess "read_sequence($filename) - usage incorrect";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
172 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
173
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
174 my $seqio;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
175
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
176 if( defined $format ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
177 $seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $format);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
178 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
179 $seqio = Bio::SeqIO->new( '-file' => $filename);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
180 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
181
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
182 my $seq = $seqio->next_seq();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
183
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
184 return $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
185 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
186
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
187
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
188 =head2 read_all_sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
189
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
190 Title : read_all_sequences
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
191 Usage : @seq_object_array = read_all_sequences($filename);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
192 @seq_object_array = read_all_sequences($filename,'genbank');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
193
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
194 Function: Just as the function above, but reads all the sequences in the
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
195 file and loads them into an array.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
196
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
197 For very large files, you will run out of memory. When this
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
198 happens, you've got to use the SeqIO system directly (this is
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
199 not so hard! Don't worry about it!).
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
200
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
201 Returns : array of Bio::Seq objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
202
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
203 Args : two strings, first the filename (any open() string is ok)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
204 second the format (which is optional)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
205
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
206 See L<Bio::SeqIO> and L<Bio::Seq> for more information
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
207
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
208 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
209
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
210 sub read_all_sequences{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
211 my ($filename,$format) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
212
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
213 if( !defined $filename ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
214 confess "read_all_sequences($filename) - usage incorrect";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
215 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
216
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
217 my $seqio;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
218
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
219 if( defined $format ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
220 $seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $format);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
221 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
222 $seqio = Bio::SeqIO->new( '-file' => $filename);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
223 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
224
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
225 my @seq_array;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
226
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
227 while( my $seq = $seqio->next_seq() ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
228 push(@seq_array,$seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
229 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
230
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
231 return @seq_array;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
232 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
233
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
234
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
235 =head2 write_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
236
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
237 Title : write_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
238 Usage : write_sequence(">new_file.gb",'genbank',$seq)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
239 write_sequence(">new_file.gb",'genbank',@array_of_sequence_objects)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
240
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
241 Function: writes sequences in the specified format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
242
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
243 Returns : true
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
244
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
245 Args : filename as a string, must provide an open() output file
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
246 format as a string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
247 one or more sequence objects
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
248
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
249
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
250 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
251
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
252 sub write_sequence{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
253 my ($filename,$format,@sequence_objects) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
254
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
255 if( scalar(@sequence_objects) == 0 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
256 confess("write_sequence(filename,format,sequence_object)");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
257 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
258
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
259 my $error = 0;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
260 my $seqname = "sequence1";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
261
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
262 # catch users who haven't passed us a filename we can open
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
263 if( $filename !~ /^\>/ && $filename !~ /^|/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
264 $filename = ">".$filename;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
265 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
266
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
267 my $seqio = Bio::SeqIO->new('-file' => $filename, '-format' => $format);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
268
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
269 foreach my $seq ( @sequence_objects ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
270 my $seq_obj;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
271
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
272 if( !ref $seq ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
273 if( length $seq > 50 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
274 # odds are this is a sequence as a string, and someone has not figured out
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
275 # how to make objects. Warn him/her and then make a sequence object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
276 # from this
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
277 if( $error == 0 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
278 carp("WARNING: You have put in a long string into write_sequence.\nI suspect this means that this is the actual sequence\nIn the future try the\n new_sequence method of this module to make a new sequence object.\nDoing this for you here\n");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
279 $error = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
280 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
281
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
282 $seq_obj = new_sequence($seq,$seqname);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
283 $seqname++;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
284 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
285 confess("You have a non object [$seq] passed to write_sequence. It maybe that you want to use new_sequence to make this string into a sequence object?");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
286 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
287 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
288 if( !$seq->isa("Bio::SeqI") ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
289 confess("object [$seq] is not a Bio::Seq object; can't write it out");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
290 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
291 $seq_obj = $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
292 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
293
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
294 # finally... we get to write out the sequence!
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
295 $seqio->write_seq($seq_obj);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
296 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
297 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
298 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
299
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
300 =head2 new_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
301
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
302 Title : new_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
303 Usage :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
304 Function:
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
305 Example :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
306 Returns :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
307 Args :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
308
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
309
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
310 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
311
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
312 sub new_sequence{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
313 my ($seq,$name,$accession) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
314
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
315 if( !defined $seq ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
316 confess("new_sequence(sequence_as_string) usage");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
317 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
318
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
319 $name ||= "no-name-for-sequence";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
320
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
321 my $seq_object = Bio::Seq->new( -seq => $seq, -id => $name);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
322
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
323 $accession && $seq_object->accession_number($accession);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
324
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
325 return $seq_object;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
326 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
327
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
328 =head2 blast_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
329
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
330 Title : blast_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
331 Usage : $blast_result = blast_sequence($seq)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
332 $blast_result = blast_sequence('MFVEGGTFASEDDDSASAEDE');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
333
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
334 Function: If the computer has Internet accessibility, blasts
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
335 the sequence using the NCBI BLAST server against nrdb.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
336
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
337 It choose the flavour of BLAST on the basis of the sequence.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
338
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
339 This function uses Bio::Tools::Run::RemoteBlast, which itself
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
340 use Bio::SearchIO - as soon as you want to more, check out
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
341 these modules
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
342 Returns : Bio::Search::Result::GenericResult.pm
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
343
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
344 Args : Either a string of protein letters or nucleotides, or a
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
345 Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
346
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
347 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
348
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
349 sub blast_sequence {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
350 my ($seq,$verbose) = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
351
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
352 if( !defined $verbose ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
353 $verbose = 1;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
354 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
355
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
356 if( !ref $seq ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
357 $seq = Bio::Seq->new( -seq => $seq, -id => 'blast-sequence-temp-id');
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
358 } elsif ( !$seq->isa('Bio::PrimarySeqI') ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
359 croak("[$seq] is an object, but not a Bio::Seq object, cannot be blasted");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
360 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
361
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
362 require Bio::Tools::Run::RemoteBlast;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
363
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
364 my $prog = 'blastp';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
365 my $e_val= '1e-10';
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
366
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
367 my @params = ( '-prog' => $prog,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
368 '-expect' => $e_val,
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
369 '-readmethod' => 'SearchIO' );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
370
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
371 my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
372
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
373 my $r = $factory->submit_blast($seq);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
374 if( $verbose ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
375 print STDERR "Submitted Blast for [".$seq->id."] ";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
376 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
377 sleep 5;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
378
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
379 my $result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
380
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
381 LOOP :
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
382 while( my @rids = $factory->each_rid) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
383 foreach my $rid ( @rids ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
384 my $rc = $factory->retrieve_blast($rid);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
385 if( !ref($rc) ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
386 if( $rc < 0 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
387 $factory->remove_rid($rid);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
388 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
389 if( $verbose ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
390 print STDERR ".";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
391 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
392 sleep 10;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
393 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
394 $result = $rc->next_result();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
395 $factory->remove_rid($rid);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
396 last LOOP;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
397 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
398 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
399 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
400
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
401 if( $verbose ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
402 print STDERR "\n";
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
403 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
404 return $result;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
405 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
406
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
407 =head2 write_blast
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
408
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
409 Title : write_blast
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
410 Usage : write_blast($filename,$blast_report);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
411
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
412 Function: Writes a BLAST result object (or more formally
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
413 a SearchIO result object) out to a filename
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
414 in BLAST-like format
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
415
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
416 Returns : none
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
417
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
418 Args : filename as a string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
419 Bio::SearchIO::Results object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
420
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
421 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
422
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
423 sub write_blast {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
424 my ($filename,$blast) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
425
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
426 if( $filename !~ /^\>/ && $filename !~ /^|/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
427 $filename = ">".$filename;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
428 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
429
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
430 my $output = Bio::SearchIO->new( -output_format => 'blast', -file => $filename);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
431
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
432 $output->write_result($blast);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
433
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
434 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
435
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
436 =head2 get_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
437
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
438 Title : get_sequence
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
439 Usage : $seq_object = get_sequence('swiss',"ROA1_HUMAN");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
440
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
441 Function: If the computer has Internet accessibility, gets
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
442 the sequence from Internet accessible databases. Currently
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
443 this supports Swissprot, EMBL, GenBank and RefSeq.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
444
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
445 Swissprot and EMBL are more robust than GenBank fetching.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
446
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
447 If the user is trying to retrieve a RefSeq entry from
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
448 GenBank/EMBL, the query is silently redirected.
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
449
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
450 Returns : A Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
451
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
452 Args : database type - one of swiss, embl, genbank or refseq
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
453 identifier or accession number
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
454
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
455 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
456
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
457 my $genbank_db = undef;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
458 my $genpept_db = undef;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
459 my $embl_db = undef;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
460 my $swiss_db = undef;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
461 my $refseq_db = undef;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
462
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
463 sub get_sequence{
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
464 my ($db_type,$identifier) = @_;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
465 if( ! $DBOKAY ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
466 confess("Your system does not have IO::String installed so the DB retrieval method is not available");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
467 return;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
468 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
469 $db_type = lc($db_type);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
470
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
471 my $db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
472
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
473 if( $db_type =~ /genbank/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
474 if( !defined $genbank_db ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
475 $genbank_db = Bio::DB::GenBank->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
476 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
477 $db = $genbank_db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
478 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
479 if( $db_type =~ /genpept/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
480 if( !defined $genpept_db ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
481 $genpept_db = Bio::DB::GenPept->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
482 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
483 $db = $genpept_db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
484 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
485
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
486 if( $db_type =~ /swiss/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
487 if( !defined $swiss_db ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
488 $swiss_db = Bio::DB::SwissProt->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
489 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
490 $db = $swiss_db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
491 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
492
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
493 if( $db_type =~ /embl/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
494 if( !defined $embl_db ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
495 $embl_db = Bio::DB::EMBL->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
496 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
497 $db = $embl_db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
498 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
499
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
500 if( $db_type =~ /refseq/ or ($db_type !~ /swiss/ and
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
501 $identifier =~ /^\s*N\S+_/)) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
502 if( !defined $refseq_db ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
503 $refseq_db = Bio::DB::RefSeq->new();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
504 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
505 $db = $refseq_db;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
506 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
507
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
508 my $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
509
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
510 if( $identifier =~ /^\w+\d+$/ ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
511 $seq = $db->get_Seq_by_acc($identifier);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
512 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
513 $seq = $db->get_Seq_by_id($identifier);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
514 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
515
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
516 return $seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
517 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
518
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
519
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
520 =head2 translate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
521
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
522 Title : translate
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
523 Usage : $seqobj = translate($seq_or_string_scalar)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
524
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
525 Function: translates a DNA sequence object OR just a plain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
526 string of DNA to amino acids
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
527 Returns : A Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
528
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
529 Args : Either a sequence object or a string of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
530 just DNA sequence characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
531
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
532 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
533
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
534 sub translate {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
535 my ($scalar) = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
536
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
537 my $obj;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
538
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
539 if( ref $scalar ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
540 if( !$scalar->isa("Bio::PrimarySeqI") ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
541 confess("Expecting a sequence object not a $scalar");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
542 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
543 $obj= $scalar;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
544
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
545 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
546
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
547 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
548
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
549 # check this looks vaguely like DNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
550 my $n = ( $scalar =~ tr/ATGCNatgc/ATGCNatgcn/ );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
551
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
552 if( $n < length($scalar) * 0.85 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
553 confess("Sequence [$scalar] is less than 85% ATGCN, which doesn't look very DNA to me");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
554 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
555
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
556 $obj = Bio::PrimarySeq->new(-id => 'internalbioperlseq',-seq => $scalar);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
557 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
558
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
559 return $obj->translate();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
560 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
561
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
562
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
563 =head2 translate_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
564
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
565 Title : translate_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
566 Usage : $seqstring = translate_as_string($seq_or_string_scalar)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
567
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
568 Function: translates a DNA sequence object OR just a plain
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
569 string of DNA to amino acids
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
570 Returns : A stirng of just amino acids
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
571
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
572 Args : Either a sequence object or a string of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
573 just DNA sequence characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
574
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
575 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
576
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
577 sub translate_as_string {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
578 my ($scalar) = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
579
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
580 my $obj = Bio::Perl::translate($scalar);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
581
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
582 return $obj->seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
583 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
584
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
585
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
586 =head2 reverse_complement
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
587
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
588 Title : reverse_complement
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
589 Usage : $seqobj = reverse_complement($seq_or_string_scalar)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
590
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
591 Function: reverse complements a string or sequnce argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
592 producing a Bio::Seq - if you want a string, you
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
593 can use reverse_complement_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
594 Returns : A Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
595
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
596 Args : Either a sequence object or a string of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
597 just DNA sequence characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
598
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
599 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
600
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
601 sub reverse_complement {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
602 my ($scalar) = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
603
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
604 my $obj;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
605
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
606 if( ref $scalar ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
607 if( !$scalar->isa("Bio::PrimarySeqI") ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
608 confess("Expecting a sequence object not a $scalar");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
609 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
610 $obj= $scalar;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
611
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
612 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
613
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
614 } else {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
615
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
616 # check this looks vaguely like DNA
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
617 my $n = ( $scalar =~ tr/ATGCNatgc/ATGCNatgcn/ );
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
618
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
619 if( $n < length($scalar) * 0.85 ) {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
620 confess("Sequence [$scalar] is less than 85% ATGCN, which doesn't look very DNA to me");
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
621 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
622
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
623 $obj = Bio::PrimarySeq->new(-id => 'internalbioperlseq',-seq => $scalar);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
624 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
625
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
626 return $obj->revcom();
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
627 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
628
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
629 =head2 revcom
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
630
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
631 Title : revcom
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
632 Usage : $seqobj = revcom($seq_or_string_scalar)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
633
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
634 Function: reverse complements a string or sequnce argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
635 producing a Bio::Seq - if you want a string, you
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
636 can use reverse_complement_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
637
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
638 This is an alias for reverse_complement
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
639 Returns : A Bio::Seq object
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
640
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
641 Args : Either a sequence object or a string of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
642 just DNA sequence characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
643
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
644 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
645
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
646 sub revcom {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
647 return &Bio::Perl::reverse_complement(@_);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
648 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
649
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
650
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
651 =head2 reverse_complement_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
652
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
653 Title : reverse_complement_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
654 Usage : $string = reverse_complement_as_string($seq_or_string_scalar)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
655
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
656 Function: reverse complements a string or sequnce argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
657 producing a string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
658 Returns : A string of DNA letters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
659
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
660 Args : Either a sequence object or a string of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
661 just DNA sequence characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
662
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
663 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
664
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
665 sub reverse_complement_as_string {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
666 my ($scalar) = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
667
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
668 my $obj = &Bio::Perl::reverse_complement($scalar);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
669
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
670 return $obj->seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
671 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
672
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
673
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
674 =head2 revcom_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
675
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
676 Title : revcom_as_string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
677 Usage : $string = revcom_as_string($seq_or_string_scalar)
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
678
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
679 Function: reverse complements a string or sequnce argument
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
680 producing a string
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
681 Returns : A string of DNA letters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
682
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
683 Args : Either a sequence object or a string of
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
684 just DNA sequence characters
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
685
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
686 =cut
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
687
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
688 sub revcom_as_string {
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
689 my ($scalar) = shift;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
690
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
691 my $obj = &Bio::Perl::reverse_complement($scalar);
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
692
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
693 return $obj->seq;
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
694 }
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
695
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
696
2bc9b66ada89 Uploaded
mahtabm
parents:
diff changeset
697 1;