Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Pipeline/Base.pm @ 0:1f6dce3d34e0
Uploaded
author | mahtabm |
---|---|
date | Thu, 11 Apr 2013 02:01:53 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1f6dce3d34e0 |
---|---|
1 package Bio::EnsEMBL::Pipeline::Base; | |
2 | |
3 use strict; | |
4 use warnings; | |
5 use base qw/Bio::EnsEMBL::Hive::Process/; | |
6 | |
7 use Bio::EnsEMBL::Utils::Exception qw/throw/; | |
8 use Bio::EnsEMBL::Utils::IO qw/work_with_file/; | |
9 use Bio::EnsEMBL::Utils::Scalar qw/check_ref/; | |
10 use File::Find; | |
11 use File::Spec; | |
12 use File::Path qw/mkpath/; | |
13 use POSIX qw/strftime/; | |
14 | |
15 # Takes in a key, checks if the current $self->param() was an empty array | |
16 # and replaces it with the value from $self->param_defaults() | |
17 sub reset_empty_array_param { | |
18 my ($self, $key) = @_; | |
19 my $param_defaults = $self->param_defaults(); | |
20 my $current = $self->param($key); | |
21 my $replacement = $self->param_defaults()->{$key}; | |
22 if(check_ref($current, 'ARRAY') && check_ref($replacement, 'ARRAY')) { | |
23 if(! @{$current}) { | |
24 $self->fine('Restting param %s because the given array was empty', $key); | |
25 $self->param($key, $replacement); | |
26 } | |
27 } | |
28 return; | |
29 } | |
30 | |
31 =head2 get_Slices | |
32 | |
33 Arg[1] : String type of DB to use (defaults to core) | |
34 Arg[2] : Boolean should we filter the slices if it is human | |
35 Example : my $slices = $self->get_Slices('core', 1); | |
36 Description : Basic get_Slices() method to return all distinct slices | |
37 for a species but also optionally filters for the | |
38 first portion of Human Y which is a non-informative region | |
39 (composed solely of N's). The code will only filter for | |
40 GRCh37 forcing the developer to update the test for other | |
41 regions. | |
42 Returntype : ArrayRef[Bio::EnsEMBL::Slice] | |
43 Exceptions : Thrown if you are filtering Human but also are not on GRCh37 | |
44 | |
45 =cut | |
46 | |
47 sub get_Slices { | |
48 my ($self, $type, $filter_human) = @_; | |
49 my $dba = $self->get_DBAdaptor($type); | |
50 throw "Cannot get a DB adaptor" unless $dba; | |
51 | |
52 my $sa = $dba->get_SliceAdaptor(); | |
53 my @slices = @{$sa->fetch_all('toplevel', undef, 1, undef, undef)}; | |
54 | |
55 if($filter_human) { | |
56 my $production_name = $self->production_name(); | |
57 if($production_name eq 'homo_sapiens') { | |
58 my ($cs) = @{$dba->get_CoordSystem()->fetch_all()}; | |
59 my $expected = 'GRCh37'; | |
60 if($cs->version() ne $expected) { | |
61 throw sprintf(q{Cannot continue as %s's coordinate system %s is not the expected %s }, $production_name, $cs->version(), $expected); | |
62 } | |
63 @slices = grep { | |
64 if($_->seq_region_name() eq 'Y' && $_->end() < 2649521) { | |
65 $self->info('Filtering small Y slice'); | |
66 0; | |
67 } | |
68 else { | |
69 1; | |
70 } | |
71 } @slices; | |
72 } | |
73 } | |
74 | |
75 return [ sort { $a->length() <=> $b->length() } @slices ]; | |
76 } | |
77 | |
78 # Registry is loaded by Hive (see beekeeper_extra_cmdline_options() in conf) | |
79 sub get_DBAdaptor { | |
80 my ($self, $type) = @_; | |
81 my $species = $self->param('species'); | |
82 $type ||= 'core'; | |
83 return Bio::EnsEMBL::Registry->get_DBAdaptor($species, $type); | |
84 } | |
85 | |
86 sub cleanup_DBAdaptor { | |
87 my ($self, $type) = @_; | |
88 my $dba = $self->get_DBAdaptor($type); | |
89 $dba->clear_caches; | |
90 $dba->dbc->disconnect_if_idle; | |
91 return; | |
92 } | |
93 | |
94 sub get_dir { | |
95 my ($self, @extras) = @_; | |
96 my $base_dir = $self->param('base_path'); | |
97 my $dir = File::Spec->catdir($base_dir, @extras); | |
98 mkpath($dir); | |
99 return $dir; | |
100 } | |
101 | |
102 sub web_name { | |
103 my ($self) = @_; | |
104 # my $mc = $self->get_DBAdaptor()->get_MetaContainer(); | |
105 # my $name = $mc->single_value_by_key('species.url'); # change back | |
106 my $name = ucfirst($self->production_name()); | |
107 return $name; | |
108 } | |
109 | |
110 sub scientific_name { | |
111 my ($self) = @_; | |
112 my $dba = $self->get_DBAdaptor(); | |
113 my $mc = $dba->get_MetaContainer(); | |
114 my $name = $mc->get_scientific_name(); | |
115 $dba->dbc()->disconnect_if_idle(); | |
116 return $name; | |
117 } | |
118 | |
119 sub assembly { | |
120 my ($self) = @_; | |
121 my $dba = $self->get_DBAdaptor(); | |
122 return $dba->get_CoordSystemAdaptor()->fetch_all()->[0]->version(); | |
123 } | |
124 | |
125 sub production_name { | |
126 my ($self, $name) = @_; | |
127 my $dba; | |
128 if($name) { | |
129 $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($name, 'core'); | |
130 } | |
131 else { | |
132 $dba = $self->get_DBAdaptor(); | |
133 } | |
134 my $mc = $dba->get_MetaContainer(); | |
135 my $prod = $mc->get_production_name(); | |
136 $dba->dbc()->disconnect_if_idle(); | |
137 return $prod; | |
138 } | |
139 | |
140 # Closes file handle, and deletes the file stub if no data was written to | |
141 # the file handle (using tell). We can also only close a file handle and unlink | |
142 # the data if it was open otherwise we just ignore it | |
143 # Returns success if we managed to delete the file | |
144 | |
145 sub tidy_file_handle { | |
146 my ($self, $fh, $path) = @_; | |
147 if($fh->opened()) { | |
148 my $unlink = ($fh->tell() == 0) ? 1 : 0; | |
149 $fh->close(); | |
150 if($unlink && -f $path) { | |
151 unlink($path); | |
152 return 1; | |
153 } | |
154 } | |
155 return 0; | |
156 } | |
157 | |
158 sub info { | |
159 my ($self, $msg, @params) = @_; | |
160 if ($self->debug() > 1) { | |
161 my $formatted_msg; | |
162 if(scalar(@params)) { | |
163 $formatted_msg = sprintf($msg, @params); | |
164 } | |
165 else { | |
166 $formatted_msg = $msg; | |
167 } | |
168 printf STDERR "INFO [%s]: %s %s\n", $self->_memory_consumption(), strftime('%c',localtime()), $formatted_msg; | |
169 } | |
170 return | |
171 } | |
172 | |
173 sub fine { | |
174 my ($self, $msg, @params) = @_; | |
175 if ($self->debug() > 2) { | |
176 my $formatted_msg; | |
177 if(scalar(@params)) { | |
178 $formatted_msg = sprintf($msg, @params); | |
179 } | |
180 else { | |
181 $formatted_msg = $msg; | |
182 } | |
183 printf STDERR "FINE [%s]: %s %s\n", $self->_memory_consumption(), strftime('%c',localtime()), $formatted_msg; | |
184 } | |
185 return | |
186 } | |
187 | |
188 sub _memory_consumption { | |
189 my ($self) = @_; | |
190 my $content = `ps -o rss $$ | grep -v RSS`; | |
191 return q{?MB} if $? >> 8 != 0; | |
192 $content =~ s/\s+//g; | |
193 my $mem = $content/1024; | |
194 return sprintf('%.2fMB', $mem); | |
195 } | |
196 | |
197 sub find_files { | |
198 my ($self, $dir, $boolean_callback) = @_; | |
199 $self->throw("Cannot find path $dir") unless -d $dir; | |
200 my @files; | |
201 find(sub { | |
202 my $path = $File::Find::name; | |
203 if($boolean_callback->($_)) { | |
204 push(@files, $path); | |
205 } | |
206 }, $dir); | |
207 return \@files; | |
208 } | |
209 | |
210 sub unlink_all_files { | |
211 my ($self, $dir) = @_; | |
212 $self->info('Removing files from the directory %s', $dir); | |
213 #Delete anything which is a file & not the current or higher directory | |
214 my $boolean_callback = sub { | |
215 return ( $_[0] =~ /^\.\.?$/) ? 0 : 1; | |
216 }; | |
217 my $files = $self->find_files($dir, $boolean_callback); | |
218 foreach my $file (@{$files}) { | |
219 $self->fine('Unlinking %s', $file); | |
220 unlink $file; | |
221 } | |
222 $self->info('Removed %d file(s)', scalar(@{$files})); | |
223 return; | |
224 } | |
225 | |
226 sub assert_executable { | |
227 my ($self, $exe) = @_; | |
228 if(! -x $exe) { | |
229 my $output = `which $exe 2>&1`; | |
230 chomp $output; | |
231 my $rc = $? >> 8; | |
232 if($rc != 0) { | |
233 my $possible_location = `locate -l 1 $exe 2>&1`; | |
234 my $loc_rc = $? >> 8; | |
235 if($loc_rc != 0) { | |
236 my $msg = 'Cannot find the executable "%s" after trying "which" and "locate -l 1". Please ensure it is on your PATH or use an absolute location and try again'; | |
237 $self->throw(sprintf($msg, $exe)); | |
238 } | |
239 } | |
240 } | |
241 return 1; | |
242 } | |
243 | |
244 1; |