annotate variant_effect_predictor/Bio/Perl.pm @ 1:d6778b5d8382 draft default tip

Deleted selected files
author willmclaren
date Fri, 03 Aug 2012 10:05:43 -0400
parents 21066c0abaf5
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
1 # $Id: Perl.pm,v 1.16.2.1 2003/03/25 12:32:15 heikki Exp $
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
2 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
3 # BioPerl module for Bio::Perl
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
4 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
5 # Cared for by Ewan Birney <bioperl-l@bio.perl.org>
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
6 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
7 # Copyright Ewan Birney
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
8 #
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
9 # You may distribute this module under the same terms as perl itself
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
10
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
11 # POD documentation - main docs before the code
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
12
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
13 =head1 NAME
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
14
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
15 Bio::Perl - Functional access to BioPerl for people who don't know objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
16
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
17 =head1 SYNOPSIS
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
18
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
19 use Bio::Perl;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
20
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
21 # will guess file format from extension
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
22 $seq_object = read_sequence($filename);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
23
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
24 # forces genbank format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
25 $seq_object = read_sequence($filename,'genbank');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
26
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
27 # reads an array of sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
28 @seq_object_array = read_all_sequences($filename,'fasta');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
29
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
30 # sequences are Bio::Seq objects, so the following methods work
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
31 # for more info see L<Bio::Seq>, or do 'perldoc Bio/Seq.pm'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
32
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
33 print "Sequence name is ",$seq_object->display_id,"\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
34 print "Sequence acc is ",$seq_object->accession_number,"\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
35 print "First 5 bases is ",$seq_object->subseq(1,5),"\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
36
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
37 # get the whole sequence as a single string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
38
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
39 $sequence_as_a_string = $seq_object->seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
40
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
41 # writing sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
42
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
43 write_sequence(">$filename",'genbank',$seq_object);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
44
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
45 write_sequence(">$filename",'genbank',@seq_object_array);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
46
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
47 # making a new sequence from just strings you have
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
48 # from something else
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
49
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
50 $seq_object = new_sequence("ATTGGTTTGGGGACCCAATTTGTGTGTTATATGTA",
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
51 "myname","AL12232");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
52
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
53
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
54 # getting a sequence from a database (assumes internet connection)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
55
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
56 $seq_object = get_sequence('swissprot',"ROA1_HUMAN");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
57
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
58 $seq_object = get_sequence('embl',"AI129902");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
59
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
60 $seq_object = get_sequence('genbank',"AI129902");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
61
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
62 # BLAST a sequence (assummes an internet connection)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
63
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
64 $blast_report = blast_sequence($seq_object);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
65
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
66 write_blast(">blast.out",$blast_report);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
67
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
68
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
69 =head1 DESCRIPTION
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
70
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
71 Easy first time access to BioPerl via functions
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
72
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
73 =head1 FEEDBACK
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
74
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
75 =head2 Mailing Lists
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
76
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
77 User feedback is an integral part of the evolution of this and other
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
78 Bioperl modules. Send your comments and suggestions preferably to one
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
79 of the Bioperl mailing lists. Your participation is much appreciated.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
80
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
81 bioperl-l@bio.perl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
82
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
83 =head2 Reporting Bugs
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
84
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
85 Report bugs to the Bioperl bug tracking system to help us keep track
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
86 the bugs and their resolution. Bug reports can be submitted via email
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
87 or the web:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
88
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
89 bioperl-bugs@bio.perl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
90 http://bugzilla.bioperl.org/
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
91
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
92 =head1 AUTHOR - Ewan Birney
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
93
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
94 Email bioperl-l@bio.perl.org
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
95
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
96 Describe contact details here
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
97
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
98 =head1 APPENDIX
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
99
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
100 The rest of the documentation details each of the object methods.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
101 Internal methods are usually preceded with a _
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
102
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
103 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
104
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
105 #'
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
106 # Let the code begin...
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
107
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
108
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
109 package Bio::Perl;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
110 use vars qw(@ISA @EXPORT @EXPORT_OK $DBOKAY);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
111 use strict;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
112 use Carp;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
113 use Exporter;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
114
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
115 use Bio::SeqIO;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
116 use Bio::Seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
117 BEGIN {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
118 eval {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
119 require Bio::DB::EMBL;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
120 require Bio::DB::GenBank;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
121 require Bio::DB::SwissProt;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
122 require Bio::DB::RefSeq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
123 require Bio::DB::GenPept;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
124 };
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
125 if( $@ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
126 $DBOKAY = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
127 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
128 $DBOKAY = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
129 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
130 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
131
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
132 @ISA = qw(Exporter);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
133
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
134 @EXPORT = qw(read_sequence read_all_sequences write_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
135 new_sequence get_sequence translate translate_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
136 reverse_complement revcom revcom_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
137 reverse_complement_as_string blast_sequence write_blast);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
138
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
139 @EXPORT_OK = @EXPORT;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
140
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
141
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
142 =head2 read_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
143
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
144 Title : read_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
145 Usage : $seq = read_sequence('sequences.fa')
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
146 $seq = read_sequence($filename,'genbank');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
147
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
148 # pipes are fine
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
149 $seq = read_sequence("my_fetching_program $id |",'fasta');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
150
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
151 Function: Reads the top sequence from the file. If no format is given, it will
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
152 try to guess the format from the filename. If a format is given, it
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
153 forces that format. The filename can be any valid perl open() string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
154 - in particular, you can put in pipes
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
155
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
156 Returns : A Bio::Seq object. A quick synopsis:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
157 $seq_object->display_id - name of the sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
158 $seq_object->seq - sequence as a string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
159
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
160 Args : Two strings, first the filename - any Perl open() string is ok
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
161 Second string is the format, which is optional
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
162
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
163 For more information on Seq objects see L<Bio::Seq>.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
164
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
165 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
166
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
167 sub read_sequence{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
168 my ($filename,$format) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
169
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
170 if( !defined $filename ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
171 confess "read_sequence($filename) - usage incorrect";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
172 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
173
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
174 my $seqio;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
175
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
176 if( defined $format ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
177 $seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $format);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
178 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
179 $seqio = Bio::SeqIO->new( '-file' => $filename);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
180 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
181
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
182 my $seq = $seqio->next_seq();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
183
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
184 return $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
185 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
186
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
187
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
188 =head2 read_all_sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
189
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
190 Title : read_all_sequences
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
191 Usage : @seq_object_array = read_all_sequences($filename);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
192 @seq_object_array = read_all_sequences($filename,'genbank');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
193
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
194 Function: Just as the function above, but reads all the sequences in the
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
195 file and loads them into an array.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
196
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
197 For very large files, you will run out of memory. When this
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
198 happens, you've got to use the SeqIO system directly (this is
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
199 not so hard! Don't worry about it!).
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
200
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
201 Returns : array of Bio::Seq objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
202
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
203 Args : two strings, first the filename (any open() string is ok)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
204 second the format (which is optional)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
205
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
206 See L<Bio::SeqIO> and L<Bio::Seq> for more information
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
207
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
208 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
209
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
210 sub read_all_sequences{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
211 my ($filename,$format) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
212
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
213 if( !defined $filename ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
214 confess "read_all_sequences($filename) - usage incorrect";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
215 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
216
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
217 my $seqio;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
218
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
219 if( defined $format ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
220 $seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $format);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
221 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
222 $seqio = Bio::SeqIO->new( '-file' => $filename);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
223 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
224
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
225 my @seq_array;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
226
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
227 while( my $seq = $seqio->next_seq() ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
228 push(@seq_array,$seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
229 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
230
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
231 return @seq_array;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
232 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
233
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
234
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
235 =head2 write_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
236
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
237 Title : write_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
238 Usage : write_sequence(">new_file.gb",'genbank',$seq)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
239 write_sequence(">new_file.gb",'genbank',@array_of_sequence_objects)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
240
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
241 Function: writes sequences in the specified format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
242
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
243 Returns : true
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
244
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
245 Args : filename as a string, must provide an open() output file
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
246 format as a string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
247 one or more sequence objects
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
248
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
249
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
250 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
251
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
252 sub write_sequence{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
253 my ($filename,$format,@sequence_objects) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
254
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
255 if( scalar(@sequence_objects) == 0 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
256 confess("write_sequence(filename,format,sequence_object)");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
257 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
258
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
259 my $error = 0;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
260 my $seqname = "sequence1";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
261
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
262 # catch users who haven't passed us a filename we can open
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
263 if( $filename !~ /^\>/ && $filename !~ /^|/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
264 $filename = ">".$filename;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
265 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
266
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
267 my $seqio = Bio::SeqIO->new('-file' => $filename, '-format' => $format);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
268
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
269 foreach my $seq ( @sequence_objects ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
270 my $seq_obj;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
271
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
272 if( !ref $seq ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
273 if( length $seq > 50 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
274 # odds are this is a sequence as a string, and someone has not figured out
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
275 # how to make objects. Warn him/her and then make a sequence object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
276 # from this
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
277 if( $error == 0 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
278 carp("WARNING: You have put in a long string into write_sequence.\nI suspect this means that this is the actual sequence\nIn the future try the\n new_sequence method of this module to make a new sequence object.\nDoing this for you here\n");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
279 $error = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
280 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
281
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
282 $seq_obj = new_sequence($seq,$seqname);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
283 $seqname++;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
284 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
285 confess("You have a non object [$seq] passed to write_sequence. It maybe that you want to use new_sequence to make this string into a sequence object?");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
286 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
287 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
288 if( !$seq->isa("Bio::SeqI") ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
289 confess("object [$seq] is not a Bio::Seq object; can't write it out");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
290 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
291 $seq_obj = $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
292 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
293
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
294 # finally... we get to write out the sequence!
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
295 $seqio->write_seq($seq_obj);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
296 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
297 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
298 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
299
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
300 =head2 new_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
301
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
302 Title : new_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
303 Usage :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
304 Function:
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
305 Example :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
306 Returns :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
307 Args :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
308
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
309
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
310 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
311
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
312 sub new_sequence{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
313 my ($seq,$name,$accession) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
314
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
315 if( !defined $seq ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
316 confess("new_sequence(sequence_as_string) usage");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
317 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
318
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
319 $name ||= "no-name-for-sequence";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
320
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
321 my $seq_object = Bio::Seq->new( -seq => $seq, -id => $name);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
322
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
323 $accession && $seq_object->accession_number($accession);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
324
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
325 return $seq_object;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
326 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
327
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
328 =head2 blast_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
329
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
330 Title : blast_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
331 Usage : $blast_result = blast_sequence($seq)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
332 $blast_result = blast_sequence('MFVEGGTFASEDDDSASAEDE');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
333
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
334 Function: If the computer has Internet accessibility, blasts
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
335 the sequence using the NCBI BLAST server against nrdb.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
336
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
337 It choose the flavour of BLAST on the basis of the sequence.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
338
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
339 This function uses Bio::Tools::Run::RemoteBlast, which itself
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
340 use Bio::SearchIO - as soon as you want to more, check out
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
341 these modules
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
342 Returns : Bio::Search::Result::GenericResult.pm
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
343
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
344 Args : Either a string of protein letters or nucleotides, or a
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
345 Bio::Seq object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
346
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
347 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
348
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
349 sub blast_sequence {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
350 my ($seq,$verbose) = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
351
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
352 if( !defined $verbose ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
353 $verbose = 1;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
354 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
355
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
356 if( !ref $seq ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
357 $seq = Bio::Seq->new( -seq => $seq, -id => 'blast-sequence-temp-id');
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
358 } elsif ( !$seq->isa('Bio::PrimarySeqI') ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
359 croak("[$seq] is an object, but not a Bio::Seq object, cannot be blasted");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
360 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
361
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
362 require Bio::Tools::Run::RemoteBlast;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
363
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
364 my $prog = 'blastp';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
365 my $e_val= '1e-10';
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
366
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
367 my @params = ( '-prog' => $prog,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
368 '-expect' => $e_val,
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
369 '-readmethod' => 'SearchIO' );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
370
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
371 my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
372
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
373 my $r = $factory->submit_blast($seq);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
374 if( $verbose ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
375 print STDERR "Submitted Blast for [".$seq->id."] ";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
376 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
377 sleep 5;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
378
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
379 my $result;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
380
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
381 LOOP :
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
382 while( my @rids = $factory->each_rid) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
383 foreach my $rid ( @rids ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
384 my $rc = $factory->retrieve_blast($rid);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
385 if( !ref($rc) ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
386 if( $rc < 0 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
387 $factory->remove_rid($rid);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
388 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
389 if( $verbose ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
390 print STDERR ".";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
391 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
392 sleep 10;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
393 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
394 $result = $rc->next_result();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
395 $factory->remove_rid($rid);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
396 last LOOP;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
397 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
398 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
399 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
400
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
401 if( $verbose ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
402 print STDERR "\n";
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
403 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
404 return $result;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
405 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
406
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
407 =head2 write_blast
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
408
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
409 Title : write_blast
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
410 Usage : write_blast($filename,$blast_report);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
411
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
412 Function: Writes a BLAST result object (or more formally
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
413 a SearchIO result object) out to a filename
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
414 in BLAST-like format
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
415
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
416 Returns : none
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
417
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
418 Args : filename as a string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
419 Bio::SearchIO::Results object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
420
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
421 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
422
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
423 sub write_blast {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
424 my ($filename,$blast) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
425
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
426 if( $filename !~ /^\>/ && $filename !~ /^|/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
427 $filename = ">".$filename;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
428 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
429
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
430 my $output = Bio::SearchIO->new( -output_format => 'blast', -file => $filename);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
431
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
432 $output->write_result($blast);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
433
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
434 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
435
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
436 =head2 get_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
437
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
438 Title : get_sequence
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
439 Usage : $seq_object = get_sequence('swiss',"ROA1_HUMAN");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
440
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
441 Function: If the computer has Internet accessibility, gets
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
442 the sequence from Internet accessible databases. Currently
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
443 this supports Swissprot, EMBL, GenBank and RefSeq.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
444
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
445 Swissprot and EMBL are more robust than GenBank fetching.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
446
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
447 If the user is trying to retrieve a RefSeq entry from
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
448 GenBank/EMBL, the query is silently redirected.
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
449
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
450 Returns : A Bio::Seq object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
451
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
452 Args : database type - one of swiss, embl, genbank or refseq
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
453 identifier or accession number
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
454
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
455 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
456
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
457 my $genbank_db = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
458 my $genpept_db = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
459 my $embl_db = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
460 my $swiss_db = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
461 my $refseq_db = undef;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
462
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
463 sub get_sequence{
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
464 my ($db_type,$identifier) = @_;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
465 if( ! $DBOKAY ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
466 confess("Your system does not have IO::String installed so the DB retrieval method is not available");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
467 return;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
468 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
469 $db_type = lc($db_type);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
470
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
471 my $db;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
472
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
473 if( $db_type =~ /genbank/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
474 if( !defined $genbank_db ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
475 $genbank_db = Bio::DB::GenBank->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
476 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
477 $db = $genbank_db;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
478 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
479 if( $db_type =~ /genpept/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
480 if( !defined $genpept_db ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
481 $genpept_db = Bio::DB::GenPept->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
482 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
483 $db = $genpept_db;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
484 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
485
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
486 if( $db_type =~ /swiss/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
487 if( !defined $swiss_db ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
488 $swiss_db = Bio::DB::SwissProt->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
489 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
490 $db = $swiss_db;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
491 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
492
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
493 if( $db_type =~ /embl/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
494 if( !defined $embl_db ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
495 $embl_db = Bio::DB::EMBL->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
496 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
497 $db = $embl_db;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
498 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
499
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
500 if( $db_type =~ /refseq/ or ($db_type !~ /swiss/ and
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
501 $identifier =~ /^\s*N\S+_/)) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
502 if( !defined $refseq_db ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
503 $refseq_db = Bio::DB::RefSeq->new();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
504 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
505 $db = $refseq_db;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
506 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
507
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
508 my $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
509
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
510 if( $identifier =~ /^\w+\d+$/ ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
511 $seq = $db->get_Seq_by_acc($identifier);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
512 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
513 $seq = $db->get_Seq_by_id($identifier);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
514 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
515
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
516 return $seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
517 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
518
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
519
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
520 =head2 translate
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
521
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
522 Title : translate
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
523 Usage : $seqobj = translate($seq_or_string_scalar)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
524
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
525 Function: translates a DNA sequence object OR just a plain
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
526 string of DNA to amino acids
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
527 Returns : A Bio::Seq object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
528
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
529 Args : Either a sequence object or a string of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
530 just DNA sequence characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
531
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
532 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
533
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
534 sub translate {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
535 my ($scalar) = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
536
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
537 my $obj;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
538
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
539 if( ref $scalar ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
540 if( !$scalar->isa("Bio::PrimarySeqI") ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
541 confess("Expecting a sequence object not a $scalar");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
542 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
543 $obj= $scalar;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
544
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
545 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
546
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
547 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
548
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
549 # check this looks vaguely like DNA
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
550 my $n = ( $scalar =~ tr/ATGCNatgc/ATGCNatgcn/ );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
551
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
552 if( $n < length($scalar) * 0.85 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
553 confess("Sequence [$scalar] is less than 85% ATGCN, which doesn't look very DNA to me");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
554 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
555
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
556 $obj = Bio::PrimarySeq->new(-id => 'internalbioperlseq',-seq => $scalar);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
557 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
558
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
559 return $obj->translate();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
560 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
561
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
562
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
563 =head2 translate_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
564
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
565 Title : translate_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
566 Usage : $seqstring = translate_as_string($seq_or_string_scalar)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
567
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
568 Function: translates a DNA sequence object OR just a plain
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
569 string of DNA to amino acids
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
570 Returns : A stirng of just amino acids
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
571
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
572 Args : Either a sequence object or a string of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
573 just DNA sequence characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
574
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
575 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
576
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
577 sub translate_as_string {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
578 my ($scalar) = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
579
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
580 my $obj = Bio::Perl::translate($scalar);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
581
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
582 return $obj->seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
583 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
584
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
585
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
586 =head2 reverse_complement
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
587
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
588 Title : reverse_complement
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
589 Usage : $seqobj = reverse_complement($seq_or_string_scalar)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
590
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
591 Function: reverse complements a string or sequnce argument
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
592 producing a Bio::Seq - if you want a string, you
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
593 can use reverse_complement_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
594 Returns : A Bio::Seq object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
595
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
596 Args : Either a sequence object or a string of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
597 just DNA sequence characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
598
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
599 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
600
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
601 sub reverse_complement {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
602 my ($scalar) = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
603
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
604 my $obj;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
605
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
606 if( ref $scalar ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
607 if( !$scalar->isa("Bio::PrimarySeqI") ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
608 confess("Expecting a sequence object not a $scalar");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
609 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
610 $obj= $scalar;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
611
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
612 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
613
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
614 } else {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
615
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
616 # check this looks vaguely like DNA
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
617 my $n = ( $scalar =~ tr/ATGCNatgc/ATGCNatgcn/ );
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
618
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
619 if( $n < length($scalar) * 0.85 ) {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
620 confess("Sequence [$scalar] is less than 85% ATGCN, which doesn't look very DNA to me");
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
621 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
622
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
623 $obj = Bio::PrimarySeq->new(-id => 'internalbioperlseq',-seq => $scalar);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
624 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
625
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
626 return $obj->revcom();
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
627 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
628
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
629 =head2 revcom
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
630
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
631 Title : revcom
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
632 Usage : $seqobj = revcom($seq_or_string_scalar)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
633
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
634 Function: reverse complements a string or sequnce argument
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
635 producing a Bio::Seq - if you want a string, you
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
636 can use reverse_complement_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
637
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
638 This is an alias for reverse_complement
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
639 Returns : A Bio::Seq object
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
640
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
641 Args : Either a sequence object or a string of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
642 just DNA sequence characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
643
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
644 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
645
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
646 sub revcom {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
647 return &Bio::Perl::reverse_complement(@_);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
648 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
649
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
650
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
651 =head2 reverse_complement_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
652
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
653 Title : reverse_complement_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
654 Usage : $string = reverse_complement_as_string($seq_or_string_scalar)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
655
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
656 Function: reverse complements a string or sequnce argument
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
657 producing a string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
658 Returns : A string of DNA letters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
659
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
660 Args : Either a sequence object or a string of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
661 just DNA sequence characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
662
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
663 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
664
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
665 sub reverse_complement_as_string {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
666 my ($scalar) = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
667
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
668 my $obj = &Bio::Perl::reverse_complement($scalar);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
669
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
670 return $obj->seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
671 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
672
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
673
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
674 =head2 revcom_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
675
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
676 Title : revcom_as_string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
677 Usage : $string = revcom_as_string($seq_or_string_scalar)
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
678
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
679 Function: reverse complements a string or sequnce argument
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
680 producing a string
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
681 Returns : A string of DNA letters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
682
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
683 Args : Either a sequence object or a string of
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
684 just DNA sequence characters
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
685
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
686 =cut
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
687
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
688 sub revcom_as_string {
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
689 my ($scalar) = shift;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
690
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
691 my $obj = &Bio::Perl::reverse_complement($scalar);
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
692
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
693 return $obj->seq;
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
694 }
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
695
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
696
21066c0abaf5 Uploaded
willmclaren
parents:
diff changeset
697 1;