0
|
1
|
|
2 =head1 LICENSE
|
|
3
|
|
4 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
5 Genome Research Limited. All rights reserved.
|
|
6
|
|
7 This software is distributed under a modified Apache license.
|
|
8 For license details, please see
|
|
9
|
|
10 http://www.ensembl.org/info/about/code_licence.html
|
|
11
|
|
12 =head1 CONTACT
|
|
13
|
|
14 Please email comments or questions to the public Ensembl
|
|
15 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
16
|
|
17 Questions may also be sent to the Ensembl help desk at
|
|
18 <helpdesk@ensembl.org>.
|
|
19
|
|
20 =head1 NAME
|
|
21
|
|
22 Bio::EnsEMBL::Funcgen::Importer
|
|
23
|
|
24 =head1 SYNOPSIS
|
|
25
|
|
26 my $imp = Bio::EnsEMBL::Funcgen::Importer->new(%params);
|
|
27 $imp->register_experiment();
|
|
28
|
|
29
|
|
30 =head1 DESCRIPTION
|
|
31
|
|
32 B<This program> is the main class coordinating import of tiling array design and experimental data.
|
|
33 It utilises several underlying parser classes specific to array vendor or import file type.
|
|
34
|
|
35 =cut
|
|
36
|
|
37 ################################################################################
|
|
38
|
|
39 package Bio::EnsEMBL::Funcgen::Importer;
|
|
40
|
|
41 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw(get_date open_file run_system_cmd);
|
|
42 use Bio::EnsEMBL::Utils::Exception qw( throw deprecate );
|
|
43 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
44 use Bio::EnsEMBL::Funcgen::Experiment;
|
|
45 use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor;
|
|
46 use Bio::EnsEMBL::DBSQL::DBAdaptor;
|
|
47
|
|
48 use File::Path;
|
|
49 use strict;
|
|
50 use vars qw(@ISA);
|
|
51
|
|
52
|
|
53 ################################################################################
|
|
54
|
|
55 =head2 new
|
|
56
|
|
57 Description : Constructor method
|
|
58
|
|
59 Arg [1] : hash containing optional attributes:
|
|
60 -name Name of Experiment(dir)
|
|
61 -format of array e.g. Tiled(default)
|
|
62 -vendor name of array vendor
|
|
63 -description of the experiment
|
|
64 -pass DB password
|
|
65 -host DB host
|
|
66 -user DB user
|
|
67 -port DB port
|
|
68 -registry_host Host to load registry from
|
|
69 -registry_port Port for registry host
|
|
70 -registry_user User for registry host
|
|
71 -registry_pass Password for registry user
|
|
72 -ssh Flag to set connection over ssh via forwarded port to localhost (default = 0); remove?
|
|
73 -group name of experimental/research group
|
|
74 ` -location of experimental/research group
|
|
75 -contact e/mail address of primary contact for experimental group
|
|
76 -species
|
|
77 -assembly Genome assembly version i.e. 36 for NCBI36
|
|
78 -recover Recovery flag (default = 0)
|
|
79 -data_dir Root data directory (default = $ENV{'EFG_DATA'})
|
|
80 -output_dir review these dirs ???????
|
|
81 -input_dir ?????????
|
|
82 -import_dir ???????
|
|
83 -norm_dir ??????
|
|
84 -fasta dump FASTA flag (default =0)
|
|
85 -array_set Flag to treat all chip designs as part of same array (default = 0)
|
|
86 -array_name Name for array set
|
|
87 -array_file Path of array file to import for sanger ENCODE array
|
|
88 -result_set_name Name to give the raw and normalised result sets (default uses experiment and analysis name)
|
|
89 -norm_method Normalisation method (Nimblegen default = VSN_GLOG);
|
|
90 -dbname Override for autogeneration of funcgen dbaname
|
|
91 -reg_config path to local registry config file (default = ~/ensembl.init || undef)
|
|
92 -design_type MGED term (default = binding_site_identification) get from meta/MAGE?
|
|
93
|
|
94 -farm Flag to submit jobs to farm e.g. normalisation jobs
|
|
95 -batch_job Flag to signify that this Importer is running as a prepared batch/farm job
|
|
96 -prepared Flag to signify result files have been previously imported in prepare mode
|
|
97 and file names will differ to those record in InputSubset
|
|
98
|
|
99
|
|
100 #-use_defaults This changes some mandatory parameters to optional, instead using either DEFAULT or the input file name for the following options -name, -input_set, -feature_type, -cell_type etc ???
|
|
101
|
|
102 -verbose
|
|
103 ReturnType : Bio::EnsEMBL::Funcgen::Importer
|
|
104 Example : my $Exp = Bio::EnsEMBL::Importer->new(%params);
|
|
105 Exceptions : throws if mandatory params are not set or DB connect fails
|
|
106 Caller : General
|
|
107 Status : Medium - potential for %params names to change, remove %attrdata?
|
|
108
|
|
109 =cut
|
|
110
|
|
111 ################################################################################
|
|
112
|
|
113 sub new{
|
|
114 my ($caller) = shift;
|
|
115
|
|
116 #my $reg = "Bio::EnsEMBL::Registry";
|
|
117 my $class = ref($caller) || $caller;
|
|
118
|
|
119 #$user, $host, $port, $pass, $dbname, $db, $assm_version, $reg_config, $reg_db, $ucsc_coords, $verbose, $release, $reg_host, $reg_port, $reg_user, $reg_pass
|
|
120 #$ftype_name, $ctype_name,
|
|
121
|
|
122 my ($name, $format, $vendor, $group, $location, $contact,
|
|
123 $array_name, $array_set, $array_file, $data_dir, $result_files,
|
|
124 $exp_date, $desc, $design_type, $output_dir, $input_dir,
|
|
125 $batch_job, $farm, $prepared,
|
|
126 $norm_method, $old_dvd_format, $parser_type)
|
|
127 = rearrange(['NAME', 'FORMAT', 'VENDOR', 'GROUP', 'LOCATION', 'CONTACT',
|
|
128 'ARRAY_NAME', 'ARRAY_SET', 'ARRAY_FILE', 'DATA_DIR', 'RESULT_FILES',
|
|
129 'EXPERIMENT_DATE', 'DESCRIPTION',
|
|
130 'DESIGN_TYPE', 'OUTPUT_DIR', 'INPUT_DIR', #to allow override of defaults
|
|
131 'BATCH_JOB', 'FARM', 'PREPARED', 'NORM_METHOD',
|
|
132 'OLD_DVD_FORMAT', 'PARSER'], @_);
|
|
133
|
|
134
|
|
135
|
|
136 #### Define parent parser class based on vendor
|
|
137 throw("Mandatory argument -vendor not defined") if ! defined $vendor;
|
|
138
|
|
139 #This will override the default Vendor Parser type
|
|
140 #Evals simply protect from messy errors if parser type not found
|
|
141 my $parser_error;
|
|
142 my $vendor_parser = ucfirst(lc($vendor));
|
|
143
|
|
144
|
|
145 #WARNING evaling these parsers to enable pluggability hides errors in parser
|
|
146 #use a perl -MBio::EnsEMBL::Funcgen::Parsers:ParserType to debug
|
|
147 #get rid of all this case guessing and force correct parser name usage?
|
|
148
|
|
149
|
|
150 #WARNING
|
|
151 #Dynamic setting of ISA in this way reports the resultant object as Importer, when
|
|
152 #some throws/methods are actually in other base/custom Parsers
|
|
153 #This can seem a little counterintuitive, but allows plugability
|
|
154 #With out the need for separate control scripts
|
|
155
|
|
156
|
|
157 #Change this to be set and required/detected in the parse_and_import.pl script
|
|
158 #Then we can have Importer.pm as the base class and get rid of this.
|
|
159 #as well as set_config methods?
|
|
160
|
|
161
|
|
162 eval {require "Bio/EnsEMBL/Funcgen/Parsers/${vendor_parser}.pm";};
|
|
163
|
|
164 if ($@) {
|
|
165 #Don't warn/throw yet as we might have a standard parser format
|
|
166
|
|
167 $parser_error .= "There is no valid parser for the vendor your have specified:\t".$vendor.
|
|
168 "\nMaybe this is a typo or maybe you want to specify a default import format using the -parser option\n".$@;
|
|
169 }
|
|
170
|
|
171
|
|
172
|
|
173 if (defined $parser_type) {
|
|
174
|
|
175 #try normal case first
|
|
176 eval {require "Bio/EnsEMBL/Funcgen/Parsers/${parser_type}.pm";};
|
|
177
|
|
178 if ($@) {
|
|
179 $parser_type = ucfirst(lc($parser_type));
|
|
180
|
|
181 #Now eval the new parser
|
|
182 eval {require "Bio/EnsEMBL/Funcgen/Parsers/${parser_type}.pm";};
|
|
183
|
|
184 if ($@) {
|
|
185
|
|
186 #Might be no default
|
|
187 my $txt = "There is no valid parser for the -parser format your have specified:\t".$parser_type."\n";
|
|
188
|
|
189 if (! $parser_error) {
|
|
190 $txt .= "Maybe this is a typo or maybe you want run with the default $vendor_parser parser\n";
|
|
191 }
|
|
192
|
|
193 throw($txt.$@);
|
|
194 }
|
|
195
|
|
196 #warn about over riding vendor parser here
|
|
197 if (! $parser_error) {
|
|
198 #Can't log this as we haven't blessed the Helper yet
|
|
199 warn("WARNING\t::\tYou are over-riding the default ".$vendor." parser with -parser ".$parser_type);
|
|
200 }
|
|
201 }
|
|
202 } else {
|
|
203 throw($parser_error) if $parser_error;
|
|
204 $parser_type = $vendor_parser;
|
|
205 }
|
|
206
|
|
207
|
|
208 #we should now really set parser_type as an attrtibute?
|
|
209 unshift @ISA, 'Bio::EnsEMBL::Funcgen::Parsers::'.$parser_type;
|
|
210 #change this to be called explicitly from the load script?
|
|
211
|
|
212 #### Create object from parent class
|
|
213
|
|
214 my $self = $class->SUPER::new(@_);
|
|
215
|
|
216 #### Set vars and test minimum mandatory params for any import type
|
|
217
|
|
218 $self->{'name'} = $name || throw('Mandatory param -name not met'); #This is not mandatory for array design import
|
|
219 ## $self->{'user'} = $user || $ENV{'EFG_WRITE_USER'};
|
|
220 $self->vendor(uc($vendor)); #already tested
|
|
221 $self->{'format'} = uc($format) || 'TILED'; #remove default?
|
|
222 $self->group($group) if $group;
|
|
223 $self->location($location) if $location;
|
|
224 $self->contact($contact) if $contact;
|
|
225 ## $species || throw('Mandatory param -species not met');
|
|
226 $self->array_name($array_name) if $array_name;
|
|
227 $self->array_set($array_set) if $array_set;
|
|
228 $self->array_file($array_file) if $array_file;
|
|
229 $self->{'data_dir'} = $data_dir || $ENV{'EFG_DATA'};
|
|
230 $self->result_files($result_files)if $result_files;
|
|
231 $self->experiment_date($exp_date) if $exp_date;
|
|
232 $self->description($desc) if $desc; #experiment
|
|
233 ## $self->feature_set_description($fset_desc) if $fset_desc;
|
|
234
|
|
235 #$assm_version || throw('Mandatory param -assembly not met');
|
|
236 #Only required if setting DB by params e.g db not passed or generated from reg
|
|
237 #i.e. most of the time
|
|
238 #Why was this made mandatory?
|
|
239 #Default to dnadb||efgdb assm from the dbname
|
|
240
|
|
241 $self->{'design_type'} = $design_type || 'binding_site_identification'; #remove default?
|
|
242 $self->{'output_dir'} = $output_dir if $output_dir; #config default override
|
|
243 $self->{'input_dir'} = $input_dir if $input_dir; #config default override
|
|
244 $self->farm($farm) if $farm;
|
|
245 $self->batch_job($batch_job);
|
|
246 $self->prepared($prepared);
|
|
247 ## $self->{'ssh'} = $ssh || 0;
|
|
248 ## $self->{'_dump_fasta'} = $fasta || 0;
|
|
249 #$self->{'recover'} = $recover || 0; Now in BaseImporter
|
|
250 #check for ~/.ensembl_init to mirror general EnsEMBL behaviour
|
|
251 ## $self->{'reg_config'} = $reg_config || ((-f "$ENV{'HOME'}/.ensembl_init") ? "$ENV{'HOME'}/.ensembl_init" : undef);
|
|
252 $self->{'old_dvd_format'} = $old_dvd_format || 0;
|
|
253 ## $self->{'ucsc_coords'} = $ucsc_coords || 0;
|
|
254 ## $self->{'verbose'} = $verbose || 0;
|
|
255 ## $self->{'release'} = $release;
|
|
256
|
|
257
|
|
258
|
|
259 ## if($reg_host && $self->{'reg_config'}){
|
|
260 ## warn "You have specified registry parameters and a config file:\t".$self->{'reg_config'}.
|
|
261 ## "\nOver-riding config file with specified paramters:\t${reg_user}@${reg_host}:$reg_port";
|
|
262 ## }
|
|
263
|
|
264
|
|
265 #Will a general norm method be applicable for all imports?
|
|
266 #Already casued problems with Bed imports... remove?
|
|
267 #Could set NORM_METHOD in Parser!!
|
|
268 #warn "Need to fully implement norm_method is validate_mage, remove ENV NORM_METHOD?";
|
|
269 $self->{'norm_method'} = $norm_method; # || $ENV{'NORM_METHOD'};
|
|
270
|
|
271 #if ($self->vendor ne 'NIMBLEGEN'){
|
|
272 # $self->{'no_mage'} = 1;
|
|
273 # warn "Hardcoding no_mage for non-NIMBLEGEN imports";
|
|
274 # }
|
|
275
|
|
276
|
|
277 # if($self->{'no_mage'} && $self->{'write_mage'}){
|
|
278 # throw('-no_mage and -write_mage options are mutually exclusive, please select just one');
|
|
279 # }
|
|
280
|
|
281 ## #### Set up DBs and load and reconfig registry
|
|
282 ##
|
|
283 ## ### Load Registry
|
|
284 ## #Can we load the registry using the assembly version, then just redefine the efg DB?
|
|
285 ## #We have problems here if we try and load on a dev version, where no dev DBs are available on ensembldb
|
|
286 ## #Get the latest API version for the assembly we want to use
|
|
287 ## #Then load the registry from that version
|
|
288 ## #Then we can remove some of the dnadb setting code below?
|
|
289 ## #This may cause problems with API schema mismatches
|
|
290 ## #Can we just test whether the current default dnadb contains the assembly?
|
|
291 ## #Problem with this is that this will not have any other data e.g. genes etc
|
|
292 ## #which may be required for some parsers
|
|
293 ##
|
|
294 ## #How does the registry pick up the schema version??
|
|
295 ##
|
|
296 ## #We should really load the registry first given the dnadb assembly version
|
|
297 ## #Then reset the eFG DB as appropriate
|
|
298 ##
|
|
299 ##
|
|
300 ## if ($reg_host || ! defined $self->{'_reg_config'}) {
|
|
301 ## #defaults to current ensembl DBs
|
|
302 ## $reg_host ||= 'ensembldb.ensembl.org';
|
|
303 ## $reg_user ||= 'anonymous';
|
|
304 ##
|
|
305 ## #Default to the most recent port for ensdb
|
|
306 ## if( (! $reg_port) &&
|
|
307 ## ($reg_host eq 'ensdb-archive') ){
|
|
308 ## $reg_port = 5304;
|
|
309 ## }
|
|
310 ##
|
|
311 ## #This will try and load the dev DBs if we are using v49 schema or API?
|
|
312 ## #Need to be mindful about this when developing
|
|
313 ## #we need to tip all this on it's head and load the reg from the dnadb version!!!!!!!
|
|
314 ##
|
|
315 ## my $version_text= ($self->{'release'}) ? 'version '.$self->{'release'} : 'current version';
|
|
316 ## $self->log("Loading $version_text registry from $reg_user".'@'.$reg_host);
|
|
317 ##
|
|
318 ## #Note this defaults API version, hence running with head code
|
|
319 ## #And not specifying a release version will find not head version
|
|
320 ## #DBs on ensembldb, resulting in an exception from reset_DBAdaptor below
|
|
321 ## $reg->load_registry_from_db(
|
|
322 ## -host => $reg_host,
|
|
323 ## -user => $reg_user,
|
|
324 ## -port => $reg_port,
|
|
325 ## -pass => $reg_pass,
|
|
326 ## #-host => "ens-staging",
|
|
327 ## #-user => 'ensro',
|
|
328 ## -db_version => $self->{'release'},#51
|
|
329 ## -verbose => $self->verbose,
|
|
330 ## );
|
|
331 ##
|
|
332 ## throw('Not sensible to set the import DB as the default eFG DB from '.$reg_host.', please define db params') if ((! $dbname) && (! $db));
|
|
333 ## }
|
|
334 ## else{
|
|
335 ## $self->log("Loading registry from:\t".$self->{'_reg_config'});
|
|
336 ## $reg->load_all($self->{'_reg_config'}, 1);
|
|
337 ## }
|
|
338 ##
|
|
339 ##
|
|
340 ## #Need to test the DBs here, as we may not have loaded any!
|
|
341 ## #get_alias wil fail otherwise?
|
|
342 ## #This is a cyclical dependancy as we need alias to get species which we use to grab the DB
|
|
343 ## #alias is dependant on core DB being loaded with relevant meta entries.
|
|
344 ## #revise this when we split the Importer
|
|
345 ##
|
|
346 ##
|
|
347 ## #Validate species
|
|
348 ## my $alias = $reg->get_alias($species) || throw("Could not find valid species alias for $species\nYou might want to clean up:\t".$self->get_dir('output'));
|
|
349 ## $self->species($alias);
|
|
350 ## $self->{'param_species'} = $species;#Only used for dir generation
|
|
351 ##
|
|
352 ##
|
|
353 ## #SET UP DBs
|
|
354 ## if($db){
|
|
355 ## #db will have been defined before reg loaded, so will be present in reg
|
|
356 ##
|
|
357 ## if(! (ref($db) && $db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){
|
|
358 ## $self->throw('-db must be a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor');
|
|
359 ## }
|
|
360 ## }
|
|
361 ## else{ #define eFG DB from params or registry
|
|
362 ##
|
|
363 ## if($reg_db){#load eFG DB from reg
|
|
364 ##
|
|
365 ## if($dbname){
|
|
366 ## throw("You cannot specify DB params($dbname) and load from the registry at the same time.");
|
|
367 ## }
|
|
368 ##
|
|
369 ## $self->log('WARNING: Loading eFG DB from Registry');
|
|
370 ## $db = $reg->get_DBAdaptor($self->species(), 'funcgen');
|
|
371 ## throw("Unable to retrieve ".$self->species." funcgen DB from the registry") if ! $db;
|
|
372 ## }
|
|
373 ## else{#resets the eFG DB in the custom or generic registry
|
|
374 ##
|
|
375 ## $dbname || throw('Must provide a -dbname if not using default custom registry config');
|
|
376 ## #$user || throw('Must provide a -user parameter');#make this default to EFG_WRITE_USER?
|
|
377 ## $pass || throw('Must provide a -pass parameter');
|
|
378 ##
|
|
379 ## #remove this and throw?
|
|
380 ## if(! defined $host){
|
|
381 ## $self->log('WARNING: Defaulting to localhost');
|
|
382 ## $host = 'localhost';
|
|
383 ## }
|
|
384 ##
|
|
385 ## $port ||= 3306;
|
|
386 ## my $host_ip = '127.0.0.1';#is this valid for all localhosts?
|
|
387 ##
|
|
388 ## if ($self->{'ssh'}) {
|
|
389 ## $host = `host localhost`; #mac specific? nslookup localhost wont work on server/non-PC
|
|
390 ## #will this always be the same?
|
|
391 ##
|
|
392 ## if (! (exists $ENV{'EFG_HOST_IP'})) {
|
|
393 ## warn "Environment variable EFG_HOST_IP not set for ssh mode, defaulting to $host_ip for $host";
|
|
394 ## } else {
|
|
395 ## $host_ip = $ENV{'EFG_HOST_IP'};
|
|
396 ## }
|
|
397 ##
|
|
398 ## if ($self->host() ne 'localhost') {
|
|
399 ## warn "Overriding host ".$self->host()." for ssh connection via localhost($host_ip)";
|
|
400 ## }
|
|
401 ## }
|
|
402 ##
|
|
403 ##
|
|
404 ## #data version is only used if we don't want to define the dbname
|
|
405 ## #This should never be guessed so don't need data_version here
|
|
406 ## #$dbname ||= $self->species()."_funcgen_".$self->data_version();
|
|
407 ##
|
|
408 ##
|
|
409 ## #Remove block below when we can
|
|
410 ## my $dbhost = ($self->{'ssh'}) ? $host_ip : $host;
|
|
411 ##
|
|
412 ## #This isn't set yet!?
|
|
413 ## #When we try to load, say 49, when we only have 48 on ensembldb
|
|
414 ## #This fails because there is not DB set for v49, as it is not on ensembl DB
|
|
415 ## #In this case we need to load from the previous version
|
|
416 ## #Trap this and suggest using the -schema_version/release option
|
|
417 ## #Can we autodetect this and reload the registry?
|
|
418 ## #We want to reload the registry anyway with the right version corresponding to the dnadb
|
|
419 ## #We could either test for the db in the registry or just pass the class.
|
|
420 ##
|
|
421 ## $db = $reg->reset_DBAdaptor($self->species(), 'funcgen', $dbname, $dbhost, $port, $self->user, $pass,
|
|
422 ## {
|
|
423 ## -dnadb_host => $reg_host,
|
|
424 ## -dnadb_port => $reg_port,
|
|
425 ## -dnadb_assembly => $assm_version,
|
|
426 ## -dnadb_user => $reg_user,
|
|
427 ## -dnadb_pass => $reg_pass,
|
|
428 ## });
|
|
429 ##
|
|
430 ##
|
|
431 ## #ConfigRegistry will try and set this
|
|
432 ## #This will fail if there is already one in the registry as it will try
|
|
433 ## #and defined a new unique species so as not to overwrite the original
|
|
434 ## #e.g. homo_sapiens1
|
|
435 ##
|
|
436 ## #This is why it was orignally written backwards as we can't easily dynamically redefine
|
|
437 ## #an adaptor in the registry without ConfigRegistry trying to change the name
|
|
438 ## #the very act of creating a new db to redefine the registry with causes ConfigRegistry
|
|
439 ## #to try and register it with a unique species name
|
|
440 ##
|
|
441 ## #Delete the old funcgen DB from the registry first
|
|
442 ## #$reg->remove_DBAdaptor($self->species, 'funcgen');
|
|
443 ##
|
|
444 ## #ConfigRegistry will automatically configure this new db
|
|
445 ##
|
|
446 ## #$db = Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new(
|
|
447 ## # -user => $user,
|
|
448 ## # -host => ($self->{'ssh'}) ? $host_ip : $host,
|
|
449 ## # -port => $port,
|
|
450 ## # -pass => $pass,
|
|
451 ## # #we need to pass dbname else we can use non-standard dbs
|
|
452 ## # -dbname => $dbname,
|
|
453 ## # -species => $self->species(),
|
|
454 ## # -group => 'funcgen',
|
|
455 ## # );
|
|
456 ##
|
|
457 ##
|
|
458 ## #if we get a species like homo_sapiens1 here
|
|
459 ## #This is because ConfigRegistry is try to make the dbname different between the
|
|
460 ## #one already present and the one you're trying to add
|
|
461 ## }
|
|
462 ## }
|
|
463 ##
|
|
464 ##
|
|
465 ## ### VALIDATE DNADB
|
|
466 ## #This is now done in DBAdaptor
|
|
467 ##
|
|
468 ## #We can change this to just use the assembly version
|
|
469 ## #we could even have the wordy assmelby version from the meta table
|
|
470 ## #do the standard ensembl subs
|
|
471 ## #s/[A-Za-z]//g;
|
|
472 ## #s/\.//g;
|
|
473 ## #And then validate?
|
|
474 ## #Just stick to number version for now.
|
|
475 ##
|
|
476 ##
|
|
477 ## #Now we need to set the dnadb_host params to avoid ensembldb defaults
|
|
478 ## #This should check the registry first
|
|
479 ## #Then load from the registry db?
|
|
480 ## #If we have a multi host registry config file this isn't going to work!
|
|
481 ##
|
|
482 ## #Is this required anymore as the DBAdaptor handles this?
|
|
483 ## #Not if we pass a db with an incorrect dnadb attached.
|
|
484 ##
|
|
485 ## #if($db->_get_schema_build($db->dnadb()) !~ /_[0-9]+_${assm_version}[a-z]*$/){
|
|
486 ### my $warning = "WARNING: dnadb does not match assembly_version $assm_version. Using ensembldb.enembl.org to define the dnadb";
|
|
487 ### $warning .= ' rather than the reg_config' if (defined $self->{'_reg_config'});
|
|
488 ##
|
|
489 ## #We need to account for reg_config DBs which may have custom info in
|
|
490 ## #So try reg_config host first, then try ensembldb with warning
|
|
491 ## #Could have a reg_config only flag for core dbs
|
|
492 ## #Need to implement more params in set_dnadb_by_assembly_version
|
|
493 ### $self->log($warning);
|
|
494 ##
|
|
495 ### $db->set_dnadb_by_assembly_version($assm_version);
|
|
496 ### }
|
|
497 ##
|
|
498 ##
|
|
499 ##
|
|
500 ##
|
|
501 ## #Test connections
|
|
502 ## $self->db($db);
|
|
503 ## $db->dbc->db_handle;
|
|
504 ## $db->dnadb->dbc->db_handle;
|
|
505 ## #Set re/disconnect options
|
|
506 ##
|
|
507 ## #These really need setting dependant on the import parser
|
|
508 ## $db->dbc->disconnect_when_inactive(1);
|
|
509 ## $db->dnadb->dbc->disconnect_when_inactive(1);
|
|
510
|
|
511
|
|
512
|
|
513 ## ### Check analyses/feature_type/cell_type
|
|
514 ## if($feature_analysis){
|
|
515 ## my $fanal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($feature_analysis);
|
|
516 ## throw("The Feature Analysis $feature_analysis does not exist in the database") if(!$fanal);
|
|
517 ## $self->feature_analysis($fanal);
|
|
518 ##
|
|
519 ## #This currently fails before the config gets loaded!
|
|
520 ## #Need to load config before this validation!
|
|
521 ## }
|
|
522 ##
|
|
523 ## if($ctype_name){
|
|
524 ## my $ctype = $self->db->get_CellTypeAdaptor->fetch_by_name($ctype_name);
|
|
525 ## throw("The CellType $ctype_name does not exist in the database") if(!$ctype);
|
|
526 ## $self->cell_type($ctype);
|
|
527 ## }
|
|
528 ##
|
|
529 ## if ($ftype_name) {
|
|
530 ## my $ftype = $self->db->get_FeatureTypeAdaptor->fetch_by_name($ftype_name);
|
|
531 ## throw("The FeatureType $ftype_name does not exist in the database") if(!$ftype);
|
|
532 ## $self->feature_type($ftype);
|
|
533 ## }
|
|
534
|
|
535
|
|
536 #Set config here instead?
|
|
537 #So we can check all mandatory params
|
|
538 #Set vendor specific attr dependent vars
|
|
539
|
|
540 #Generic input dir
|
|
541 $self->{'input_dir'} ||= $self->get_dir("data").'/input/'.$self->{'param_species'}.'/'.$self->vendor().'/'.$self->name();
|
|
542
|
|
543 if (! -d $self->get_dir('input')) {
|
|
544
|
|
545 if (@{$self->result_files}) {
|
|
546 #This is really InputSet specific
|
|
547 #Could go in init_experiment_import
|
|
548 $self->log("Processing files:\n\t\t".join("\n\t\t",@{$self->result_files}));
|
|
549 } else {
|
|
550 throw('input_dir is not defined or does not exist ('.$self->get_dir('input').')');
|
|
551 }
|
|
552 }
|
|
553
|
|
554 #Parser specific config
|
|
555 $self->set_config();
|
|
556
|
|
557
|
|
558 $self->debug(2, "Importer class instance created.");
|
|
559 $self->debug_hash(3, \$self);
|
|
560
|
|
561 return ($self);
|
|
562 }
|
|
563
|
|
564 =head2 registry_host
|
|
565
|
|
566 Example : my $reg_host = $imp->registry_host;
|
|
567 Description: Accessor for registry host attribute
|
|
568 Returntype : string e.g. ensembldb.ensembl.org
|
|
569 Exceptions : None
|
|
570 Caller : general
|
|
571 Status : at risk
|
|
572
|
|
573 =cut
|
|
574
|
|
575 sub registry_host{
|
|
576 return $_[0]->{'reg_host'};
|
|
577 }
|
|
578
|
|
579 =head2 registry_user
|
|
580
|
|
581 Example : my $reg_user = $imp->registry_user;
|
|
582 Description: Accessor for registry user attribute
|
|
583 Returntype : string e.g. ensembldb.ensembl.org
|
|
584 Exceptions : None
|
|
585 Caller : general
|
|
586 Status : at risk
|
|
587
|
|
588 =cut
|
|
589
|
|
590 sub registry_user{
|
|
591 return $_[0]->{'reg_user'};
|
|
592 }
|
|
593
|
|
594 =head2 registry_port
|
|
595
|
|
596 Example : my $reg_port = $imp->registry_port;
|
|
597 Description: Accessor for registry port attribute
|
|
598 Returntype : string e.g. ensembldb.ensembl.org
|
|
599 Exceptions : None
|
|
600 Caller : general
|
|
601 Status : at risk
|
|
602
|
|
603 =cut
|
|
604
|
|
605 sub registry_port{
|
|
606 return $_[0]->{'reg_port'};
|
|
607 }
|
|
608
|
|
609 =head2 registry_pass
|
|
610
|
|
611 Example : my $reg_pass = $imp->registry_pass;
|
|
612 Description: Accessor for registry pass attribute
|
|
613 Returntype : string e.g. ensembldb.ensembl.org
|
|
614 Exceptions : None
|
|
615 Caller : general
|
|
616 Status : at risk
|
|
617
|
|
618 =cut
|
|
619
|
|
620 sub registry_pass{
|
|
621 return $_[0]->{'reg_pass'};
|
|
622 }
|
|
623
|
|
624
|
|
625 #init method kept separate from new due to differing madatory check and set up
|
|
626
|
|
627 =head2 init_array_import
|
|
628
|
|
629 Example : $self->init_import();
|
|
630 Description: Initialises import by creating working directories
|
|
631 and by storing the Experiemnt
|
|
632 Returntype : none
|
|
633 Exceptions : warns and throws depending on recover and Experiment status
|
|
634 Caller : general
|
|
635 Status : at risk - merge with register_array_design
|
|
636
|
|
637 =cut
|
|
638
|
|
639 sub init_array_import{
|
|
640
|
|
641 my ($self) = shift;
|
|
642
|
|
643 # we need to define which paramters we'll be storing
|
|
644 #use the logic names of the analyses as the field headers
|
|
645
|
|
646 #need to test for vendor here
|
|
647
|
|
648 #Sanger, NIMBLEGEN(no design_id issue, could get from the ndf, but we want it in the DesignNotes.txt)
|
|
649 #Then we can change the Array/Chip generation to soley use the DesignNotes.txt rather than SampleKey
|
|
650 #which is experiment specific
|
|
651 #or eFG format.
|
|
652
|
|
653 $self->create_output_dirs('caches', 'fastas');
|
|
654
|
|
655
|
|
656 }
|
|
657
|
|
658
|
|
659 =head2 init_experiment_import
|
|
660
|
|
661 Example : $self->init_import();
|
|
662 Description: Initialises import by creating working directories
|
|
663 and by storing the Experiemnt
|
|
664 Returntype : none
|
|
665 Exceptions : warns and throws depending on recover and Experiment status
|
|
666 Caller : general
|
|
667 Status : at risk - merge with register exeriment
|
|
668
|
|
669 =cut
|
|
670
|
|
671 sub init_experiment_import{
|
|
672 my ($self) = shift;
|
|
673
|
|
674 #Change this to take config mandatory params?
|
|
675 #No specific to exp import
|
|
676 #Name is used in set_config anyway
|
|
677 #Currently we only have array and experiment import, both of which should have names
|
|
678 #Make mandatory?
|
|
679
|
|
680 foreach my $tmp ("group", "data_dir") { #name now generically mandatory
|
|
681 throw("Mandatory arg $tmp not been defined") if (! defined $self->{$tmp});
|
|
682 }
|
|
683 #Should we separate path on group here too, so we can have a dev/test group?
|
|
684
|
|
685 #Create output dirs
|
|
686 #This should be moved to the Parser to avoid generating directories which are needed for different imports
|
|
687 $self->create_output_dirs('raw', 'norm', 'caches', 'fastas');
|
|
688 throw("No result_files defined.") if (! defined $self->result_files());
|
|
689
|
|
690 #Log input files
|
|
691 #if (@{$self->result_files()}) {
|
|
692 # $self->log("Found result files arguments:\n\t".join("\n\t", @{$self->result_files()}));
|
|
693 #}
|
|
694 #This is done in new
|
|
695
|
|
696 #check for cell||feature and warn if no met file supplied?
|
|
697
|
|
698 if ($self->norm_method) {
|
|
699 my $norm_anal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($self->norm_method);
|
|
700
|
|
701 #should we list the valid analyses?
|
|
702 throw($self->norm_method.' is not a valid analysis') if ! $norm_anal;
|
|
703 $self->norm_analysis($norm_anal);
|
|
704 } else {
|
|
705 $self->log('WARNING: No normalisation analysis specified');
|
|
706 }
|
|
707
|
|
708 #warn "Need to check env vars here or in Parser or just after set_config?";
|
|
709 #Need generic method for checking ENV vars in Helper
|
|
710 #check for ENV vars?
|
|
711 #R_LIBS
|
|
712 #R_PATH if ! farm
|
|
713 #R_FARM_PATH
|
|
714
|
|
715 $self->validate_group(); #import experimental_group
|
|
716
|
|
717 #Get experiment
|
|
718 my $exp_adaptor = $self->db->get_ExperimentAdaptor();
|
|
719 my $exp = $exp_adaptor->fetch_by_name($self->name()); #, $self->group());
|
|
720 $self->process_experiment_config if $self->can('process_experiment_config'); #Parsers::MAGE::process_experiment_config
|
|
721
|
|
722 #Moved MAGE support form here to MAGE.pm
|
|
723
|
|
724 #Recovery now set so deal with experiment
|
|
725 if ($self->recovery() && ($exp)) {
|
|
726 $self->log("Using previously stored Experiment:\t".$exp->name);
|
|
727 } elsif ((! $self->recovery()) && $exp) {
|
|
728 throw("Your experiment name is already registered in the database, please choose a different \"name\", this will require renaming you input directory, or specify -recover if you are working with a failed/partial import.");
|
|
729 #can we skip this and store, and then check in register experiment if it is already stored then throw if not recovery
|
|
730 } else { # (recover && exp) || (recover && ! exp)
|
|
731
|
|
732
|
|
733 $exp = Bio::EnsEMBL::Funcgen::Experiment->new(
|
|
734 -EXPERIMENTAL_GROUP => $self->{egroup},
|
|
735 -NAME => $self->name(),
|
|
736 -DATE => $self->experiment_date(),
|
|
737 -PRIMARY_DESIGN_TYPE => $self->design_type(),
|
|
738 -DESCRIPTION => $self->description(),
|
|
739 -ADAPTOR => $self->db->get_ExperimentAdaptor(),
|
|
740 );
|
|
741
|
|
742 ($exp) = @{$exp_adaptor->store($exp)};
|
|
743 }
|
|
744
|
|
745
|
|
746 $self->experiment($exp);
|
|
747
|
|
748 #remove and add specific report, this is catchig some Root stuff
|
|
749 #$self->log("Initiated efg import with following parameters:\n".Data::Dumper::Dumper(\$self));
|
|
750
|
|
751 return;
|
|
752 }
|
|
753
|
|
754
|
|
755 #Move this to new or init_experiment
|
|
756
|
|
757 =head2 validate_group
|
|
758
|
|
759 Example : $self->validate_group();
|
|
760 Description: Validates groups details
|
|
761 Returntype : none
|
|
762 Exceptions : throws if insufficient info defined to store new Group and is not already present
|
|
763 Caller : general
|
|
764 Status : Medium - check location and contact i.e. group name clash?
|
|
765
|
|
766 =cut
|
|
767
|
|
768 sub validate_group{
|
|
769 my ($self) = shift;
|
|
770
|
|
771 my $egroup = $self->db->get_ExperimentalGroupAdaptor->fetch_by_name($self->group);
|
|
772
|
|
773 if (! defined $egroup) {
|
|
774
|
|
775 throw("validate_group does not yet fully support ExperimentalGroups, please add manually");
|
|
776
|
|
777 #if ($self->location() && $self->contact()) {
|
|
778 # $self->db->import_group($self->group(), $self->location, $self->contact());
|
|
779 #} else {
|
|
780 # throw("Group ".$self->group()." does not exist, please specify a location and contact to register the group");
|
|
781 #}
|
|
782 }
|
|
783
|
|
784 $self->{egroup} = $egroup;
|
|
785
|
|
786 return;
|
|
787 }
|
|
788
|
|
789 =head2 create_output_dirs
|
|
790
|
|
791 Example : $self->create_output_dirs();
|
|
792 Description: Does what it says on the tin, creates dirs in
|
|
793 the root output dir foreach @dirnames, also set paths in self
|
|
794 Arg [1] : mandatory - list of dir names
|
|
795 Returntype : none
|
|
796 Exceptions : none
|
|
797 Caller : general
|
|
798 Status : Medium - add throw?
|
|
799
|
|
800 =cut
|
|
801
|
|
802 sub create_output_dirs{
|
|
803 my ($self, @dirnames) = @_;
|
|
804
|
|
805 #output dir created in control script
|
|
806 #avoids errors when logs generated first
|
|
807
|
|
808
|
|
809 foreach my $name (@dirnames) {
|
|
810
|
|
811 if ($name eq 'caches') {
|
|
812 $self->{"${name}_dir"} = $ENV{'EFG_DATA'}."/${name}/".$self->db->dbc->dbname() if(! defined $self->{"${name}_dir"});
|
|
813 } elsif ($name eq 'fastas') {
|
|
814 $self->{"${name}_dir"} = $ENV{'EFG_DATA'}."/${name}/" if(! defined $self->{"${name}_dir"});
|
|
815 } else {
|
|
816 $self->{"${name}_dir"} = $self->get_dir('output')."/${name}" if(! defined $self->{"${name}_dir"});
|
|
817 }
|
|
818
|
|
819 if (! (-d $self->get_dir($name) || (-l $self->get_dir($name)))) {
|
|
820 $self->log("Creating directory:\t".$self->get_dir($name));
|
|
821 #This did not throw with mkdir!!
|
|
822 mkpath $self->get_dir($name) || throw('Failed to create directory: '. $self->get_dir($name));
|
|
823 chmod 0744, $self->get_dir($name);
|
|
824 }
|
|
825 }
|
|
826
|
|
827 return;
|
|
828 }
|
|
829
|
|
830 =head2 vendor
|
|
831
|
|
832 Example : $imp->vendor("NimbleGen");
|
|
833 Description: Getter/Setter for array vendor
|
|
834 Arg [1] : optional - vendor name
|
|
835 Returntype : string
|
|
836 Exceptions : none
|
|
837 Caller : general
|
|
838 Status : Stable
|
|
839
|
|
840 =cut
|
|
841
|
|
842 sub vendor{
|
|
843 my ($self) = shift;
|
|
844
|
|
845 if (@_) {
|
|
846 $self->{'vendor'} = shift;
|
|
847 $self->{'vendor'} = uc($self->{'vendor'});
|
|
848 }
|
|
849
|
|
850 return $self->{'vendor'};
|
|
851 }
|
|
852
|
|
853
|
|
854 =head2 feature_type
|
|
855
|
|
856 Example : $imp->feature_type($ftype);
|
|
857 Description: Getter/Setter for Experiment FeatureType
|
|
858 Arg [1] : optional - Bio::EnsEMBL::Funcgen::FeatureType
|
|
859 Returntype : Bio::EnsEMBL::Funcgen::FeatureType
|
|
860 Exceptions : Throws if arg is not valid or stored
|
|
861 Caller : general
|
|
862 Status : at risk
|
|
863
|
|
864 =cut
|
|
865
|
|
866 sub feature_type{
|
|
867 my ($self) = shift;
|
|
868
|
|
869 if (@_) {
|
|
870 my $ftype = shift;
|
|
871
|
|
872 #do we need this as we're checking in new?
|
|
873 if (! ($ftype->isa('Bio::EnsEMBL::Funcgen::FeatureType') && $ftype->dbID())) {
|
|
874 throw("Must pass a valid stored Bio::EnsEMBL::Funcgen::FeatureType");
|
|
875 }
|
|
876
|
|
877 $self->{'feature_type'} = $ftype;
|
|
878 }
|
|
879
|
|
880 return $self->{'feature_type'};
|
|
881 }
|
|
882
|
|
883 =head2 feature_analysis
|
|
884
|
|
885 Example : $imp->feature_analysis($fanal);
|
|
886 Description: Getter/Setter for Analysis used for creating the imported Features
|
|
887 Arg [1] : optional - Bio::EnsEMBL::Analysis
|
|
888 Returntype : Bio::EnsEMBL::Analysis
|
|
889 Exceptions : Throws if arg is not valid or stored
|
|
890 Caller : general
|
|
891 Status : at risk
|
|
892
|
|
893 =cut
|
|
894
|
|
895 sub feature_analysis{
|
|
896 my ($self) = shift;
|
|
897
|
|
898 if (@_) {
|
|
899 my $fanal = shift;
|
|
900
|
|
901 #do we need this as we're checking in new?
|
|
902 if (! (ref ($fanal) && $fanal->isa('Bio::EnsEMBL::Analysis') && $fanal->dbID())) {
|
|
903 throw("Must pass a valid stored Bio::EnsEMBL::Analysis");
|
|
904 }
|
|
905
|
|
906 $self->{'feature_analysis'} = $fanal;
|
|
907 }
|
|
908
|
|
909 return $self->{'feature_analysis'};
|
|
910 }
|
|
911
|
|
912 =head2 norm_analysis
|
|
913
|
|
914 Example : $imp->norm_analysis($anal);
|
|
915 Description: Getter/Setter for the normalisation analysis
|
|
916 Arg [1] : optional - Bio::EnsEMBL::Analysis
|
|
917 Returntype : Bio::EnsEMBL::Analysis
|
|
918 Exceptions : Throws if arg is not valid or stored
|
|
919 Caller : general
|
|
920 Status : at risk
|
|
921
|
|
922 =cut
|
|
923
|
|
924 sub norm_analysis{
|
|
925 my ($self) = shift;
|
|
926
|
|
927 if (@_) {
|
|
928 my $anal = shift;
|
|
929
|
|
930 #do we need this as we're checking in new?
|
|
931 if (! (ref($anal) && $anal->isa('Bio::EnsEMBL::Analysis') && $anal->dbID())) {
|
|
932 throw("Must pass a valid stored Bio::EnsEMBL::Analysis");
|
|
933 }
|
|
934
|
|
935 $self->{'norm_analysis'} = $anal;
|
|
936 }
|
|
937
|
|
938 return $self->{'norm_analysis'};
|
|
939 }
|
|
940
|
|
941
|
|
942
|
|
943 =head2 cell_type
|
|
944
|
|
945 Example : $imp->cell_type($ctype);
|
|
946 Description: Getter/Setter for Experiment CellType
|
|
947 Arg [1] : optional - Bio::EnsEMBL::Funcgen::CellType
|
|
948 Returntype : Bio::EnsEMBL::Funcgen::CellType
|
|
949 Exceptions : Throws if arg is not valid or stored
|
|
950 Caller : general
|
|
951 Status : at risk
|
|
952
|
|
953 =cut
|
|
954
|
|
955 sub cell_type{
|
|
956 my ($self) = shift;
|
|
957
|
|
958 if (@_) {
|
|
959 my $ctype = shift;
|
|
960
|
|
961 #do we need this as we're checking in new?
|
|
962 if (! ($ctype->isa('Bio::EnsEMBL::Funcgen::CellType') && $ctype->dbID())) {
|
|
963 throw("Must pass a valid stored Bio::EnsEMBL::Funcgen::CellType");
|
|
964 }
|
|
965
|
|
966 $self->{'cell_type'} = $ctype;
|
|
967 }
|
|
968
|
|
969 return $self->{'cell_type'};
|
|
970 }
|
|
971
|
|
972
|
|
973 ##=head2 ucsc_coords
|
|
974 ##
|
|
975 ## Example : $start += 1 if $self->ucsc_coords;
|
|
976 ## Description: Getter for UCSC coordinate usage flag
|
|
977 ## Returntype : boolean
|
|
978 ## Exceptions : none
|
|
979 ## Caller : general
|
|
980 ## Status : at risk
|
|
981 ##
|
|
982 ##=cut
|
|
983 ##
|
|
984 ##sub ucsc_coords{
|
|
985 ## my $self = shift;
|
|
986 ## return $self->{'ucsc_coords'};
|
|
987 ##}
|
|
988
|
|
989
|
|
990
|
|
991 =head2 array_file
|
|
992
|
|
993 Example : my $array_file = $imp->array_file();
|
|
994 Description: Getter/Setter for sanger/design array file
|
|
995 Arg [1] : optional - path to adf or gff array definition/mapping file
|
|
996 Returntype : string
|
|
997 Exceptions : none
|
|
998 Caller : general
|
|
999 Status : at risk
|
|
1000
|
|
1001 =cut
|
|
1002
|
|
1003 sub array_file{
|
|
1004 my ($self) = shift;
|
|
1005 $self->{'array_file'} = shift if(@_);
|
|
1006 return $self->{'array_file'};
|
|
1007 }
|
|
1008
|
|
1009 =head2 array_name
|
|
1010
|
|
1011 Example : my $array_name = $imp->array_name();
|
|
1012 Description: Getter/Setter for array name
|
|
1013 Arg [1] : optional string - name of array
|
|
1014 Returntype : string
|
|
1015 Exceptions : none
|
|
1016 Caller : general
|
|
1017 Status : at risk
|
|
1018
|
|
1019 =cut
|
|
1020
|
|
1021 sub array_name{
|
|
1022 my ($self) = shift;
|
|
1023 $self->{'array_name'} = shift if(@_);
|
|
1024 return $self->{'array_name'};
|
|
1025 }
|
|
1026
|
|
1027
|
|
1028 =head2 array_set
|
|
1029
|
|
1030 Example : $imp->array_set(1);
|
|
1031 Description: Getter/Setter for array set flag
|
|
1032 Arg [1] : optional boolean - treat all array chips as the same array
|
|
1033 Returntype : boolean
|
|
1034 Exceptions : none
|
|
1035 Caller : general
|
|
1036 Status : at risk
|
|
1037
|
|
1038 =cut
|
|
1039
|
|
1040 sub array_set{
|
|
1041 my ($self) = shift;
|
|
1042 $self->{'array_set'} = shift if(@_);
|
|
1043 return $self->{'array_set'};
|
|
1044 }
|
|
1045
|
|
1046
|
|
1047 =head2 add_Array
|
|
1048
|
|
1049 Arg [1] : Bio::EnsEMBL::Funcgen::Array
|
|
1050 Example : $self->add_Array($array);
|
|
1051 Description: Setter for array elements
|
|
1052 Returntype : none
|
|
1053 Exceptions : throws if passed non Array or if more than one Array set
|
|
1054 Caller : Importer
|
|
1055 Status : At risk - Implement multiple arrays? Move to Experiment?
|
|
1056
|
|
1057 =cut
|
|
1058
|
|
1059 sub add_Array{
|
|
1060 my $self = shift;
|
|
1061
|
|
1062 #do we need to check if stored?
|
|
1063 if (! $_[0]->isa('Bio::EnsEMBL::Funcgen::Array')) {
|
|
1064 throw("Must supply a Bio::EnsEMBL::Funcgen::Array");
|
|
1065 } elsif (@_) {
|
|
1066 push @{$self->{'arrays'}}, @_;
|
|
1067 }
|
|
1068
|
|
1069 throw("Does not yet support multiple array imports") if(scalar (@{$self->{'arrays'}}) > 1);
|
|
1070 #need to alter read_probe data at the very least
|
|
1071
|
|
1072 return;
|
|
1073 }
|
|
1074
|
|
1075
|
|
1076
|
|
1077 =head2 arrays
|
|
1078
|
|
1079 Example : foreach my $array(@{$imp->arrays}){ ...do an array of things ...};
|
|
1080 Description: Getter for the arrays attribute
|
|
1081 Returntype : ARRAYREF
|
|
1082 Exceptions : none
|
|
1083 Caller : general
|
|
1084 Status : at risk
|
|
1085
|
|
1086 =cut
|
|
1087
|
|
1088 sub arrays{
|
|
1089 my $self = shift;
|
|
1090
|
|
1091 if (! defined $self->{'arrays'}) {
|
|
1092 $self->{'arrays'} = $self->db->get_ArrayAdaptor->fetch_all_by_Experiment($self->experiment());
|
|
1093 }
|
|
1094
|
|
1095 return $self->{'arrays'};
|
|
1096 }
|
|
1097
|
|
1098
|
|
1099
|
|
1100 =head2 location
|
|
1101
|
|
1102 Example : $imp->vendor("Hinxton");
|
|
1103 Description: Getter/Setter for group location
|
|
1104 Arg [1] : optional - location
|
|
1105 Returntype : string
|
|
1106 Exceptions : none
|
|
1107 Caller : general
|
|
1108 Status : Stable
|
|
1109
|
|
1110 =cut
|
|
1111
|
|
1112
|
|
1113 sub location{
|
|
1114 my ($self) = shift;
|
|
1115 $self->{'location'} = shift if(@_);
|
|
1116 return $self->{'location'};
|
|
1117 }
|
|
1118
|
|
1119
|
|
1120 =head2 contact
|
|
1121
|
|
1122 Example : my $contact = $imp->contact();
|
|
1123 Description: Getter/Setter for the group contact
|
|
1124 Arg [1] : optional - contact name/email/address
|
|
1125 Returntype : string
|
|
1126 Exceptions : none
|
|
1127 Caller : general
|
|
1128 Status : Stable
|
|
1129
|
|
1130 =cut
|
|
1131
|
|
1132 sub contact{
|
|
1133 my ($self) = shift;
|
|
1134 $self->{'contact'} = shift if(@_);
|
|
1135 return $self->{'contact'};
|
|
1136 }
|
|
1137
|
|
1138 =head2 name
|
|
1139
|
|
1140 Example : $imp->name('Experiment1');
|
|
1141 Description: Getter/Setter for the experiment name
|
|
1142 Arg [1] : optional - experiment name
|
|
1143 Returntype : string
|
|
1144 Exceptions : none
|
|
1145 Caller : general
|
|
1146 Status : Stable
|
|
1147
|
|
1148 =cut
|
|
1149
|
|
1150
|
|
1151 sub name{
|
|
1152 my ($self) = shift;
|
|
1153 $self->{'name'} = shift if(@_);
|
|
1154 return $self->{'name'};
|
|
1155 }
|
|
1156
|
|
1157 =head2 result_files
|
|
1158
|
|
1159 Example : $imp->result_files(\@files);
|
|
1160 Description: Getter/Setter for the result file paths
|
|
1161 Arg [1] : Listref of file paths
|
|
1162 Returntype : Listref
|
|
1163 Exceptions : none
|
|
1164 Caller : general
|
|
1165 Status : At risk
|
|
1166
|
|
1167 =cut
|
|
1168
|
|
1169
|
|
1170 sub result_files{
|
|
1171 my ($self) = shift;
|
|
1172 $self->{'result_files'} = shift if(@_);
|
|
1173 return $self->{'result_files'};
|
|
1174 }
|
|
1175
|
|
1176
|
|
1177
|
|
1178
|
|
1179
|
|
1180 =head2 experiment_date
|
|
1181
|
|
1182 Example : $imp->experiment_date('2006-11-02');
|
|
1183 Description: Getter/Setter for the experiment date
|
|
1184 Arg [1] : optional - date string in yyyy-mm-dd
|
|
1185 Returntype : string
|
|
1186 Exceptions : none
|
|
1187 Caller : general
|
|
1188 Status : At risk
|
|
1189
|
|
1190 =cut
|
|
1191
|
|
1192
|
|
1193
|
|
1194 sub experiment_date{
|
|
1195 my ($self) = shift;
|
|
1196
|
|
1197 if (@_) {
|
|
1198 my $date = shift;
|
|
1199
|
|
1200 if ($date !~ /[0-9]{4}-[0-9]{2}[0-9]{2}/o) {
|
|
1201 throw('Parameter -experiment_date needs to fe in the format: YYYY-MM-DD');
|
|
1202 }
|
|
1203
|
|
1204 $self->{'experiment_date'} = $date;
|
|
1205 } elsif ($self->vendor() eq "nimblegen" && ! defined $self->{'experiment_date'}) {
|
|
1206 $self->{'experiment_date'} = &get_date("date", $self->get_config("chip_file")),
|
|
1207 }
|
|
1208
|
|
1209 return $self->{'experiment_date'};
|
|
1210 }
|
|
1211
|
|
1212
|
|
1213
|
|
1214 =head2 group
|
|
1215
|
|
1216 Example : my $exp_group = $imp->group();
|
|
1217 Description: Getter/Setter for the group name
|
|
1218 Arg [1] : optional - group name
|
|
1219 Returntype : string
|
|
1220 Exceptions : none
|
|
1221 Caller : general
|
|
1222 Status : Stable
|
|
1223
|
|
1224 =cut
|
|
1225
|
|
1226 sub group{
|
|
1227 my ($self) = shift;
|
|
1228 $self->{'group'} = shift if(@_);
|
|
1229 return $self->{'group'};
|
|
1230 }
|
|
1231
|
|
1232
|
|
1233 =head2 description
|
|
1234
|
|
1235 Example : $imp->description("Human chrX H3 Lys 9 methlyation");
|
|
1236 Description: Getter/Setter for the experiment element
|
|
1237 Arg [1] : optional - experiment description
|
|
1238 Returntype : string
|
|
1239 Exceptions : none
|
|
1240 Caller : general
|
|
1241 Status : Stable
|
|
1242
|
|
1243 =cut
|
|
1244
|
|
1245 sub description{
|
|
1246 my $self = shift;
|
|
1247
|
|
1248 if (@_) {
|
|
1249 $self->{'description'} = shift;
|
|
1250 }
|
|
1251
|
|
1252 return $self->{'description'};
|
|
1253 }
|
|
1254
|
|
1255 ##=head2 feature_set_description
|
|
1256 ##
|
|
1257 ## Example : $imp->description("ExperimentalSet description");
|
|
1258 ## Description: Getter/Setter for the FeatureSet description for an
|
|
1259 ## InputSet import e.g. preprocessed GFF/Bed data
|
|
1260 ## Arg [1] : optional - string feature set description
|
|
1261 ## Returntype : string
|
|
1262 ## Exceptions : none
|
|
1263 ## Caller : general
|
|
1264 ## Status : At risk
|
|
1265 ##
|
|
1266 ##=cut
|
|
1267 ##
|
|
1268 ##sub feature_set_description{
|
|
1269 ## my $self = shift;
|
|
1270 ##
|
|
1271 ## $self->{'feature_set_description'} = shift if @_;
|
|
1272 ##
|
|
1273 ## return $self->{'feature_set_description'};
|
|
1274 ##}
|
|
1275
|
|
1276 =head2 format
|
|
1277
|
|
1278 Example : $imp->format("Tiled");
|
|
1279 Description: Getter/Setter for the array format
|
|
1280 Arg [1] : optional - array format
|
|
1281 Returntype : string
|
|
1282 Exceptions : none
|
|
1283 Caller : general
|
|
1284 Status : Stable
|
|
1285
|
|
1286 =cut
|
|
1287
|
|
1288 sub format{
|
|
1289 my ($self) = shift;
|
|
1290 $self->{'format'} = shift if(@_);
|
|
1291 return $self->{'format'};
|
|
1292 }
|
|
1293
|
|
1294 =head2 experiment
|
|
1295
|
|
1296 Example : my $exp = $imp->experiment();
|
|
1297 Description: Getter/Setter for the Experiment element
|
|
1298 Arg [1] : optional - Bio::EnsEMBL::Funcgen::Experiment
|
|
1299 Returntype : Bio::EnsEMBL::Funcgen::Experiment
|
|
1300 Exceptions : throws if arg is not an Experiment
|
|
1301 Caller : general
|
|
1302 Status : Stable
|
|
1303
|
|
1304 =cut
|
|
1305
|
|
1306 sub experiment{
|
|
1307 my ($self) = shift;
|
|
1308
|
|
1309 if (@_) {
|
|
1310
|
|
1311 if (! $_[0]->isa('Bio::EnsEMBL::Funcgen::Experiment')) {
|
|
1312 throw("Must pass a Bio::EnsEMBL::Funcgen::Experiment object");
|
|
1313 }
|
|
1314
|
|
1315 $self->{'experiment'} = shift;
|
|
1316 }
|
|
1317
|
|
1318 return $self->{'experiment'};
|
|
1319 }
|
|
1320
|
|
1321 ##=head2 db
|
|
1322 ##
|
|
1323 ## Example : $imp->db($funcgen_db);
|
|
1324 ## Description: Getter/Setter for the db element
|
|
1325 ## Arg [1] : optional - Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor
|
|
1326 ## Returntype : Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor
|
|
1327 ## Exceptions : throws if arg is not an DBAdaptor
|
|
1328 ## Caller : general
|
|
1329 ## Status : Stable
|
|
1330 ##
|
|
1331 ##=cut
|
|
1332 ##
|
|
1333 ##sub db{
|
|
1334 ## my $self = shift;
|
|
1335 ##
|
|
1336 ## if (defined $_[0] && $_[0]->isa("Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor")) {
|
|
1337 ## $self->{'db'} = shift;
|
|
1338 ## } elsif (defined $_[0]) {
|
|
1339 ## throw("Need to pass a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor");
|
|
1340 ## }
|
|
1341 ##
|
|
1342 ## return $self->{'db'};
|
|
1343 ##}
|
|
1344 ##
|
|
1345 ##=head2 pass
|
|
1346 ##
|
|
1347 ## Example : $imp->pass("password");
|
|
1348 ## Description: Getter/Setter for the db password
|
|
1349 ## Arg [1] : optional - db password
|
|
1350 ## Returntype : string
|
|
1351 ## Exceptions : none
|
|
1352 ## Caller : general
|
|
1353 ## Status : Stable
|
|
1354 ##
|
|
1355 ##=cut
|
|
1356 ##
|
|
1357 ##
|
|
1358 ##sub pass{
|
|
1359 ## my $self = shift;
|
|
1360 ## $self->{'pass'} = shift if(@_);
|
|
1361 ## return $self->{'pass'};
|
|
1362 ##}
|
|
1363 ##
|
|
1364 ##=head2 pass
|
|
1365 ##
|
|
1366 ## Example : $imp->host("hoastname");
|
|
1367 ## Description: Getter/Setter for the db hostname
|
|
1368 ## Arg [1] : optional - db hostname
|
|
1369 ## Returntype : string
|
|
1370 ## Exceptions : none
|
|
1371 ## Caller : general
|
|
1372 ## Status : Stable
|
|
1373 ##
|
|
1374 ##=cut
|
|
1375 ##
|
|
1376 ##sub host{
|
|
1377 ## my $self = shift;
|
|
1378 ## $self->{'host'} = shift if(@_);
|
|
1379 ## return $self->{'host'};
|
|
1380 ##}
|
|
1381 ##
|
|
1382 ##=head2 port
|
|
1383 ##
|
|
1384 ## Example : $imp->port(3306);
|
|
1385 ## Description: Getter/Setter for the db port number
|
|
1386 ## Arg [1] : optional - db port number
|
|
1387 ## Returntype : int
|
|
1388 ## Exceptions : none
|
|
1389 ## Caller : general
|
|
1390 ## Status : Stable
|
|
1391 ##
|
|
1392 ##=cut
|
|
1393 ##
|
|
1394 ##sub port{
|
|
1395 ## my $self = shift;
|
|
1396 ## $self->{'port'} = shift if(@_);
|
|
1397 ## return $self->{'port'};
|
|
1398 ##}
|
|
1399 ##
|
|
1400 ##=head2 user
|
|
1401 ##
|
|
1402 ## Example : $imp->user("user_name");
|
|
1403 ## Description: Getter/Setter for the db user name
|
|
1404 ## Arg [1] : optional - db user name
|
|
1405 ## Returntype : string
|
|
1406 ## Exceptions : none
|
|
1407 ## Caller : general
|
|
1408 ## Status : Stable
|
|
1409 ##
|
|
1410 ##=cut
|
|
1411 ##
|
|
1412 ##sub user{
|
|
1413 ## my $self = shift;
|
|
1414 ## $self->{'user'} = shift if(@_);
|
|
1415 ## return $self->{'user'};
|
|
1416 ##}
|
|
1417
|
|
1418 ##=head2 dump_fasta
|
|
1419 ##
|
|
1420 ## Example : if($self->dump_fasta()){...do fasta dump...}
|
|
1421 ## Description: Getter/Setter for the dump_fasta flag
|
|
1422 ## Arg [1] : optional - 0 or 1
|
|
1423 ## Returntype : boolean
|
|
1424 ## Exceptions : none
|
|
1425 ## Caller : self
|
|
1426 ## Status : Stable
|
|
1427 ##
|
|
1428 ##=cut
|
|
1429 ##
|
|
1430 ##
|
|
1431 ##sub dump_fasta{
|
|
1432 ## my $self = shift;
|
|
1433 ## $self->{'_dump_fasta'} = shift if @_;
|
|
1434 ## return $self->{'_dump_fasta'};
|
|
1435 ##}
|
|
1436 ##
|
|
1437
|
|
1438
|
|
1439 ##=head2 species
|
|
1440 ##
|
|
1441 ## Example : $imp->species("homo_sapiens");
|
|
1442 ## Description: Getter/Setter for species
|
|
1443 ## Arg [1] : optional - species name(alias?)
|
|
1444 ## Returntype : string
|
|
1445 ## Exceptions : none ? throw if no alias found?
|
|
1446 ## Caller : general
|
|
1447 ## Status : Medium - may move reg alias look up to this method
|
|
1448 ##
|
|
1449 ##=cut
|
|
1450 ##
|
|
1451 ##sub species{
|
|
1452 ## my $self = shift;
|
|
1453 ##
|
|
1454 ## #should we do reg alias look up here?
|
|
1455 ## #Will we ever want to redefine species?
|
|
1456 ## #Change to just getter?
|
|
1457 ##
|
|
1458 ## $self->{'species'} = shift if(@_);
|
|
1459 ##
|
|
1460 ## return $self->{'species'};
|
|
1461 ##}
|
|
1462
|
|
1463 =head2 get_dir
|
|
1464
|
|
1465 Example : $imp->get_dir("import");
|
|
1466 Description: Retrieves full path for given directory
|
|
1467 Arg [1] : mandatory - dir name
|
|
1468 Returntype : string
|
|
1469 Exceptions : none
|
|
1470 Caller : general
|
|
1471 Status : at risk - move to Helper?
|
|
1472
|
|
1473 =cut
|
|
1474
|
|
1475 sub get_dir{
|
|
1476 my ($self, $dirname) = @_;
|
|
1477 return $self->get_data("${dirname}_dir");
|
|
1478 }
|
|
1479
|
|
1480 =head2 norm_method
|
|
1481
|
|
1482 Example : my $norm_method = $imp->norm_method()
|
|
1483 Description: Getter/Setter for normalisation method
|
|
1484 Arg [1] : mandatory - method name
|
|
1485 Returntype : string
|
|
1486 Exceptions : none ? throw if no analysis with logic name
|
|
1487 Caller : general
|
|
1488 Status : At risk - restrict to logic_name and validate against DB, allow multiple
|
|
1489
|
|
1490 =cut
|
|
1491
|
|
1492 #Move to Nimblegen?
|
|
1493 #Do we ever want to normalise other data?
|
|
1494
|
|
1495 sub norm_method{
|
|
1496 my $self = shift;
|
|
1497
|
|
1498 if (@_) {
|
|
1499 $self->{'norm_method'} = shift;
|
|
1500 } elsif (! defined $self->{'norm_method'}) {
|
|
1501 $self->{'norm_method'}= $self->get_config('norm_method');
|
|
1502 }
|
|
1503
|
|
1504 return $self->{'norm_method'};
|
|
1505 }
|
|
1506
|
|
1507
|
|
1508 =head2 get_config
|
|
1509
|
|
1510 Arg [1] : mandatory - name of the data element to retrieve from the config hash
|
|
1511 Example : %dye_freqs = %{$imp->get_config('dye_freqs')};
|
|
1512 Description: returns data from the definitions hash
|
|
1513 Returntype : various
|
|
1514 Exceptions : none
|
|
1515 Caller : Importer
|
|
1516 Status : at risk - replace with direct calls in the inherited Defs class?
|
|
1517
|
|
1518 =cut
|
|
1519
|
|
1520
|
|
1521 sub get_config{
|
|
1522 my ($self, $data_name) = @_;
|
|
1523 return $self->get_data('config', $data_name); #will this cause undefs?
|
|
1524 }
|
|
1525
|
|
1526
|
|
1527
|
|
1528
|
|
1529
|
|
1530 =head2 register_experiment
|
|
1531
|
|
1532 Example : $imp->register_experiment()
|
|
1533 Description: General control method, performs all data import and normalisations
|
|
1534 Arg [1] : optional - dnadb DBAdaptor
|
|
1535 Returntype : none
|
|
1536 Exceptions : throws if arg is not Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
1537 Caller : general
|
|
1538 Status : Medium
|
|
1539
|
|
1540 =cut
|
|
1541
|
|
1542
|
|
1543 #Need to split this method?
|
|
1544 #Pre farm process
|
|
1545 #Define and store all sets,
|
|
1546 #pre process input file once if required
|
|
1547 #How are we going to be able to tell wether this has been done successfully?
|
|
1548 #runner will catch error, therefore safe to assume that file is complete if farm job
|
|
1549 #unless we are not running with farm
|
|
1550
|
|
1551 #farm specific processes
|
|
1552 #actually parse and store data
|
|
1553
|
|
1554 #Can we separate these for different import types?
|
|
1555
|
|
1556 sub register_experiment{
|
|
1557 my ($self) = shift;
|
|
1558
|
|
1559 #Need to check for dnadb passed with adaptor to contructor
|
|
1560 if (@_) {
|
|
1561
|
|
1562 if ( ! $_[0]->isa("Bio::EnsEMBL::DBSQL::DBAdaptor")) {
|
|
1563 throw("You need to pass a valid dnadb adaptor to register the experiment");
|
|
1564 }
|
|
1565 $self->db->dnadb($_[0]);
|
|
1566 } elsif ( ! $self->db) {
|
|
1567 throw("You need to pass/set a DBAdaptor with a DNADB attached of the relevant data version");
|
|
1568 }
|
|
1569
|
|
1570 #This could still be the default core db for the current version
|
|
1571 #warn here if not passed DB?
|
|
1572 #These should be vendor independent, only the read methods should need specific order?
|
|
1573
|
|
1574 $self->init_experiment_import();
|
|
1575 #can we just have init here instead?
|
|
1576
|
|
1577
|
|
1578 #This could do with a rewrite to move some things to the parsers
|
|
1579 #$self::SUPER->register_experiment
|
|
1580
|
|
1581 $self->write_validate_experiment_config if $self->can('write_validate_experiment_config');
|
|
1582
|
|
1583 #This is too array specific!
|
|
1584 #Can we have an array of import_methods in the config?
|
|
1585 #foreach my $method(@{$self->get_config{'import_methods'}}){
|
|
1586 #$self->method;
|
|
1587 #}
|
|
1588 #We're already doing this in read_data for each of these data_types
|
|
1589
|
|
1590 #Need to be able to run this separately, so we can normalise previously imported sets with different methods
|
|
1591 #should be able t do this without raw data files e.g. retrieve info from DB
|
|
1592 #Is this implemented?
|
|
1593
|
|
1594 $self->read_data("probe");
|
|
1595 $self->read_data("results");
|
|
1596
|
|
1597
|
|
1598 my $norm_method = $self->norm_method();
|
|
1599
|
|
1600 if (defined $norm_method) {
|
|
1601 warn "norm method is $norm_method";
|
|
1602
|
|
1603 $self->R_norm($norm_method);
|
|
1604 #change this to $self->$norm_method
|
|
1605 #so we can have non-R_norm normalisation
|
|
1606 }
|
|
1607
|
|
1608
|
|
1609 return;
|
|
1610 }
|
|
1611
|
|
1612 #Move array specific ones to Nimblegen.pm?
|
|
1613 #Also used by ArrayDesign and Sanger.pm
|
|
1614 #So need to create Array.pm baseclass, which is a Helper.
|
|
1615
|
|
1616 =head2 store_set_probes_features
|
|
1617
|
|
1618 Arg [1] : mandatory - array chip id
|
|
1619 Arg [2] : optional - Bio::EnsEMBL::Funcgen::ProbeSet
|
|
1620 Arg [3] : mandatory - hashref of keys probe id, values are
|
|
1621 hash of probe/features with values
|
|
1622 Bio::EnsEMBL::Funcgen::Probe/Features for a given
|
|
1623 probe set if defined.
|
|
1624 Example : $self->store_set_probes_features($ac->dbID(), $ops, \%pfs);
|
|
1625 Description: Stores probe set, probes and probe features
|
|
1626 Returntype : none
|
|
1627 Exceptions : none
|
|
1628 Caller : self
|
|
1629 Status : Medium
|
|
1630
|
|
1631 =cut
|
|
1632
|
|
1633 sub store_set_probes_features{
|
|
1634 my ($self, $ac_id, $pf_hash, $ops) = @_;
|
|
1635
|
|
1636 ### Deal with ProbeSets
|
|
1637 if ($ops) {
|
|
1638 $ops->size(scalar(keys %$pf_hash));
|
|
1639 ($ops) = $self->db->get_ProbeSetAdaptor->store($ops);
|
|
1640 }
|
|
1641
|
|
1642 #If we're going to validate fully, we need to check for probes in this probeset on this array chip
|
|
1643 #Update size if we have any new probes
|
|
1644 #Overkill? Only do on recover? Do not read if array chip is IMPORTED
|
|
1645 #This does not make any attempt to validate probes/set vs previously stored data
|
|
1646
|
|
1647 for my $probe_id (keys %$pf_hash) {
|
|
1648
|
|
1649 #set probeset in probe and store
|
|
1650 #the process corresponding feature
|
|
1651 my $probe = $pf_hash->{$probe_id}->{'probe'};
|
|
1652 $probe->probeset($ops) if $ops;
|
|
1653 ($probe) = @{$self->db->get_ProbeAdaptor->store($probe)};
|
|
1654
|
|
1655 #Can't use get_all_Arrays here as we can't guarantee this will only ever be the array we've generated
|
|
1656 #Might dynamically load array if non-present
|
|
1657 #This is allowing multiple dbIDs per probe??? Is this wrong?
|
|
1658 #$self->cache_probe_info($probe->get_probename(), $probe->dbID());###########Remove as we're now importing all then resolving
|
|
1659
|
|
1660
|
|
1661 foreach my $feature (@{$pf_hash->{$probe_id}->{'features'}}) {
|
|
1662 $feature->probe($probe);
|
|
1663 ($feature) = @{$self->db->get_ProbeFeatureAdaptor->store($feature)};
|
|
1664 }
|
|
1665 }
|
|
1666
|
|
1667 undef $ops; #Will this persist in the caller?
|
|
1668 undef %{$pf_hash};
|
|
1669
|
|
1670 return;
|
|
1671 }
|
|
1672
|
|
1673
|
|
1674 =head2 cache_slice
|
|
1675
|
|
1676 Arg [0] : string - region_name e.g. X
|
|
1677 Arg [1] : optional - coordinate system name e.g. supercontig, defaults to chromosome
|
|
1678 Example : my $slice = $self->cache_slice(12);
|
|
1679 Description: Caches or retrieves from cache a given slice
|
|
1680 Returntype : Bio::EnsEMBL::Slice
|
|
1681 Exceptions : throws f no region name specified
|
|
1682 Caller : self
|
|
1683 Status : At risk
|
|
1684
|
|
1685 =cut
|
|
1686
|
|
1687 sub cache_slice{
|
|
1688 my ($self, $region_name, $cs_name, $total_count) = @_;
|
|
1689
|
|
1690 throw("Need to define a region_name to cache a slice from") if ! $region_name;
|
|
1691 $self->{'slice_cache'} ||= {};
|
|
1692 $region_name =~ s/chr//;
|
|
1693 $region_name = "MT" if $region_name eq "M";
|
|
1694
|
|
1695 if (! exists ${$self->{'seen_slice_cache'}}{$region_name}) {
|
|
1696 my $slice = $self->slice_adaptor->fetch_by_region($cs_name, $region_name);
|
|
1697
|
|
1698 #Set seen cache so we don't try this again
|
|
1699 $self->{seen_slice_cache}{$region_name} = $slice;
|
|
1700
|
|
1701 if (! $slice) {
|
|
1702 warn("-- Could not generate a slice for ${cs_name}:$region_name\n");
|
|
1703 } else {
|
|
1704
|
|
1705 my $sr_name = $slice->seq_region_name; #In case we passed a slice name
|
|
1706
|
|
1707 if (@{$self->{seq_region_names}}) {
|
|
1708 return if ! grep(/^${sr_name}$/, @{$self->{seq_region_names}}); #not on required slice
|
|
1709 }
|
|
1710 }
|
|
1711
|
|
1712 $self->{'slice_cache'}->{$region_name} = $slice;
|
|
1713 }
|
|
1714
|
|
1715 if ($total_count && exists ${$self->{'seen_slice_cache'}}{$region_name}) {
|
|
1716 #This is an InputSet specific method
|
|
1717 $self->count('total_features') if $self->can('count');
|
|
1718 }
|
|
1719
|
|
1720 #Only return if exists to avoid creating hash key
|
|
1721 return (exists $self->{'slice_cache'}->{$region_name}) ? $self->{'slice_cache'}->{$region_name} : undef;
|
|
1722 }
|
|
1723
|
|
1724 =head2 slice_cache
|
|
1725
|
|
1726 Example : my @seen_slices = values(%{$self->slice_cache});;
|
|
1727 Description: Returns the slice cache i.e. all the Slices seen in the data filtered
|
|
1728 by the defined slices. This method can be used to run only the appropriate
|
|
1729 slice jobs after a prepare stage.
|
|
1730 Returntype : Hashref of seq_region name Bio::EnsEMBL::Slice pairs
|
|
1731 Exceptions : None
|
|
1732 Caller : self
|
|
1733 Status : At risk
|
|
1734
|
|
1735 =cut
|
|
1736
|
|
1737
|
|
1738 sub slice_cache{
|
|
1739 my $self = shift;
|
|
1740
|
|
1741 return $self->{'slice_cache'};
|
|
1742 }
|
|
1743
|
|
1744
|
|
1745
|
|
1746
|
|
1747 =head2 cache_probe_info
|
|
1748
|
|
1749 Arg [0] : mandatory - probe name
|
|
1750 Arg [1] : mandatory - probe dbID
|
|
1751 Arg [2] : optioanl int - x coord of probe on array
|
|
1752 Arg [3] : optional int - y coord of probe on array
|
|
1753 Example : $self->cache_probe_info("Probe1", $probe->dbID());
|
|
1754 Or for result files which do not have X & Y, we need to cache
|
|
1755 X & Y from the design files: $self->cache_probe_info('Probe2', $probe->dbID(), $x, $y);
|
|
1756 Description: Setter for probe cache values
|
|
1757 Returntype : none
|
|
1758 Exceptions : throws is cache conflict encountered
|
|
1759 Caller : self
|
|
1760 Status : At risk - merge with following?
|
|
1761
|
|
1762 =cut
|
|
1763
|
|
1764 sub cache_probe_info{
|
|
1765 my ($self, $pname, $pid, $x, $y) = @_;
|
|
1766
|
|
1767 throw('Deprecated, too memory expensive, now resolving DB duplicates and using Tied File cache');
|
|
1768 throw("Must provide a probe name and id") if (! defined $pname || ! defined $pid);
|
|
1769
|
|
1770
|
|
1771 #do we need to loop through the file here?
|
|
1772 #if we make sure we're testing for a previous dbID before storing probes then we don't need to do this
|
|
1773 #we can catch the error when we get the probe id as we can check for >1 id for the same probe name
|
|
1774 #if (defined $self->{'_probe_cache'}->{$pname} && ($self->{'_probe_cache'}->{$pname}->[0] != $pid)) {
|
|
1775 # throw("Found two differing dbIDs for $pname, need to sort out redundant probe entries");
|
|
1776 #}
|
|
1777
|
|
1778 $self->{'_probe_cache'}->{$pname} = (defined $x && defined $y) ? [$pid, $x, $y] : [$pid];
|
|
1779
|
|
1780 return;
|
|
1781 }
|
|
1782
|
|
1783
|
|
1784 =head2 get_probe_id_by_name_Array
|
|
1785
|
|
1786 Arg [1] : mandatory - probe name
|
|
1787 Example : $pid = $self->get_probe_id_by_name($pname);
|
|
1788 Description: Getter for probe cache values
|
|
1789 Returntype : int
|
|
1790 Exceptions : none
|
|
1791 Caller : self
|
|
1792 Status : At risk - merge with previous, move to importer?
|
|
1793
|
|
1794 =cut
|
|
1795
|
|
1796 sub get_probe_id_by_name_Array{
|
|
1797 my ($self, $name, $array) = @_;
|
|
1798
|
|
1799 #this is only ever called for fully imported ArrayChips, as will be deleted if recovering
|
|
1800 $self->resolve_probe_data() if(! exists $self->{'_probe_cache'}{$array->name()});
|
|
1801
|
|
1802 #we want to cycle through the given cache starting from the last position or 0.
|
|
1803 #we don't want to have to test for the size of the cache each time as this will be quite expensive
|
|
1804 #so we should store sizes separately along with current position
|
|
1805
|
|
1806
|
|
1807 my ($pid, $line);
|
|
1808
|
|
1809 #check current line
|
|
1810 if ($line = $self->{'_probe_cache'}{$array->name()}{'current_line'}) {
|
|
1811 if ($line =~ /^\Q${name}\E\t/) {
|
|
1812 $pid = (split/\t/o, $line)[1];
|
|
1813 }
|
|
1814 }
|
|
1815
|
|
1816
|
|
1817 if (! $pid) {
|
|
1818 while ($line = $self->{'_probe_cache'}{$array->name()}{'handle'}->getline()) {
|
|
1819
|
|
1820 if ($line =~ /^\Q${name}\E\t/) {
|
|
1821 $pid = (split/\t/o, $line)[1];
|
|
1822 $self->{'_probe_cache'}{$array->name()}{'current_line'} = $line;
|
|
1823 last;
|
|
1824 }
|
|
1825 }
|
|
1826 }
|
|
1827
|
|
1828 #do not remove this
|
|
1829 if (! $pid) {
|
|
1830 throw("Did not find probe name ($name) in cache, cache may need rebuilding, results may need sorting, or do you have an anomolaous probe?")
|
|
1831 } else {
|
|
1832 chomp $pid;
|
|
1833 }
|
|
1834
|
|
1835 return $pid;
|
|
1836 }
|
|
1837
|
|
1838 =head2 get_probe_cache_by_Array
|
|
1839
|
|
1840 Arg[1] : Bio::EnsEMBL::Funcgen::Array
|
|
1841 Arg[2] : boolean - from db flag, only to be used by Importer->resolve_probe_data !
|
|
1842 Example : $self->get_probe_cache_by_Array();
|
|
1843 Description: Gets the probe info cache which is an array tied to a file
|
|
1844 Returntype : Boolean - True if cache has been generated and set successfully
|
|
1845 Exceptions : none
|
|
1846 Caller : general
|
|
1847 Status : At risk
|
|
1848
|
|
1849 =cut
|
|
1850
|
|
1851 #from db flag should only be used by importer
|
|
1852 #this is because there is no guarantee that it will be resolved unless
|
|
1853 #called by resolve_probe_data
|
|
1854 #which then renames the file and resets the handle
|
|
1855 #can we clean this up and protect/hide this functionality?
|
|
1856 #can we check the cache file name in the get methods and throw if it contains unresolved?
|
|
1857 #make this private?
|
|
1858
|
|
1859 sub get_probe_cache_by_Array{
|
|
1860 my ($self, $array, $from_db) = @_;
|
|
1861
|
|
1862 my $msg = "Getting probe cache for ".$array->name();
|
|
1863 $msg .= " from DB" if $from_db;
|
|
1864 $self->log($msg); #, 1);
|
|
1865
|
|
1866 if (! ($array && $array->isa('Bio::EnsEMBL::Funcgen::Array') && $array->dbID())) {
|
|
1867 throw('Must provide a valid stored Bio::EnsEMBL::Funcgen::Array object');
|
|
1868 }
|
|
1869
|
|
1870 my $set = 0;
|
|
1871 my $cache_file = $self->get_dir('caches').'/'.$array->name().'.probe_cache';
|
|
1872
|
|
1873 ### Generate and resolve fresh cache from DB
|
|
1874 if ($from_db) {
|
|
1875
|
|
1876 $cache_file .= '.unresolved'; #This will be renamed by the caller if it is resolved
|
|
1877
|
|
1878 if (exists $self->{'_probe_cache'}{$array->name()}) {
|
|
1879 $self->log('Rebuilding probe_cache from DB for '.$array->name(), 1);
|
|
1880
|
|
1881
|
|
1882 #untie @{$self->{'_probe_cache'}{$array->name()}{'entries'}};
|
|
1883 #close($self->{'_probe_cache'}{$array->name()}{'handle'});#do we need to do this?
|
|
1884 delete $self->{'_probe_cache'}{$array->name()}; #implicitly closes
|
|
1885 $self->log('Deleted old cache', 1);
|
|
1886 } else {
|
|
1887 $self->log('Building probe_cache from DB for '.$array->name(), 1);
|
|
1888 }
|
|
1889
|
|
1890 #Move this to ProbeAdaptor?
|
|
1891 #This is where we'd set the unique key for a vendor and resolves duplicates based on the key
|
|
1892 my $cmd = 'SELECT name, probe_id from probe WHERE array_chip_id IN ('.join(',', @{$array->get_array_chip_ids()}).') ORDER by name, probe_id';
|
|
1893 $cmd = 'mysql '.$self->db->connect_string()." -e \"$cmd\" >".$cache_file;
|
|
1894 run_system_cmd($cmd);
|
|
1895
|
|
1896 }
|
|
1897
|
|
1898 ### Set cache
|
|
1899 if (-f $cache_file) {
|
|
1900 $self->log('MD5 check here?',1);
|
|
1901 $self->{'_probe_cache'}{$array->name()}{'current_line'} = undef;
|
|
1902 $self->{'_probe_cache'}{$array->name()}{'handle'} = open_file($cache_file);
|
|
1903
|
|
1904 #can we do a select count instead? and do this instead of the MD5?
|
|
1905 #$cmd = "wc -l $cache_file";
|
|
1906 #my $size = `$cmd`;
|
|
1907
|
|
1908 $set = 1;
|
|
1909 } else {
|
|
1910 warn 'Failed to get probe cache for array:'.$array->name();
|
|
1911 }
|
|
1912
|
|
1913 return $set;
|
|
1914 }
|
|
1915
|
|
1916
|
|
1917 #should reorganise these emthods to split reading the array data, and the actual data
|
|
1918 #currently:
|
|
1919 #meta reads array and chip data
|
|
1920 #probe reads probe_set, probes, which should definitely be in array, probe_feature? and results
|
|
1921 #native data format may not map to these methods directly, so may need to call previous method if required data not defined
|
|
1922
|
|
1923
|
|
1924 =head2 read_data
|
|
1925
|
|
1926 Example : $self->read_data("probe")
|
|
1927 Description: Calls each method in data_type array from config hash
|
|
1928 Arg [1] : mandatory - data type
|
|
1929 Returntype : none
|
|
1930 Exceptions : none
|
|
1931 Caller : self
|
|
1932 Status : At risk
|
|
1933
|
|
1934 =cut
|
|
1935
|
|
1936 sub read_data{
|
|
1937 my($self, $data_type) = @_;
|
|
1938
|
|
1939 map {my $method = "read_${_}_data"; $self->$method()} @{$self->get_config("${data_type}_data")};
|
|
1940 return;
|
|
1941 }
|
|
1942
|
|
1943
|
|
1944 =head2 design_type
|
|
1945
|
|
1946 Example : $self->design_type("binding_site_identification")
|
|
1947 Description: Getter/Setter for experimental design type
|
|
1948 Arg [1] : optional - design type
|
|
1949 Returntype : string
|
|
1950 Exceptions : none
|
|
1951 Caller : general
|
|
1952 Status : At risk
|
|
1953
|
|
1954 =cut
|
|
1955
|
|
1956 sub design_type{
|
|
1957 my $self = shift;
|
|
1958 return $self->{'design_type'};
|
|
1959 }
|
|
1960
|
|
1961
|
|
1962 =head2 get_chr_seq_region_id
|
|
1963
|
|
1964 Example : $seq_region_id = $self->get_seq_region_id('X');
|
|
1965 Description: Calls each method in data_type array from config hash
|
|
1966 Arg [1] : mandatory - chromosome name
|
|
1967 Arg [2] : optional - start value
|
|
1968 Arg [3] : optional - end value
|
|
1969 Returntype : int
|
|
1970 Exceptions : none
|
|
1971 Caller : self
|
|
1972 Status : At risk
|
|
1973
|
|
1974 =cut
|
|
1975
|
|
1976 #convinience wrapper method
|
|
1977 #could we use the seq region cache instead?
|
|
1978 #this seems like a lot of overhead for getting the id
|
|
1979 sub get_chr_seq_region_id{
|
|
1980 my ($self, $chr, $start, $end) = @_;
|
|
1981 #what about strand info?
|
|
1982
|
|
1983 #do we need the start and stop?
|
|
1984
|
|
1985 #use start and stop to prevent problems with scaffodl assemblies, i.e. >1 seq_region_id
|
|
1986 #my $slice = $self->slice_adaptor->fetch_by_region("chromosome", $chr, $start, $end);
|
|
1987 #we could pass the slice back to the slice adaptor for this, to avoid dbid problems betwen DBs
|
|
1988
|
|
1989 ###would need to implement other cs's here
|
|
1990
|
|
1991 return $self->slice_adaptor->fetch_by_region("chromosome", $chr, $start, $end)->get_seq_region_id();
|
|
1992 }
|
|
1993
|
|
1994
|
|
1995
|
|
1996 =head2 vsn_norm
|
|
1997
|
|
1998 Example : $self->vsn_norm();
|
|
1999 Description: Convinience/Wrapper method for vsn R normalisation
|
|
2000 Returntype : none
|
|
2001 Exceptions : none
|
|
2002 Caller : general
|
|
2003 Status : At risk
|
|
2004
|
|
2005 =cut
|
|
2006
|
|
2007 #Have Norm class or contain methods in importer?
|
|
2008 #Need to have analysis set up script for all standard analyses.
|
|
2009
|
|
2010 sub vsn_norm{
|
|
2011 my $self = shift;
|
|
2012 return $self->R_norm("VSN_GLOG");
|
|
2013 }
|
|
2014
|
|
2015
|
|
2016 =head2 farm
|
|
2017
|
|
2018 Arg [1] : Boolean
|
|
2019 Example : $importer->farm(1);
|
|
2020 Description: Flag to turn farm submission on
|
|
2021 Returntype : Boolean
|
|
2022 Exceptions : Throws is argument not a boolean
|
|
2023 Caller : general
|
|
2024 Status : At risk
|
|
2025
|
|
2026 =cut
|
|
2027
|
|
2028
|
|
2029 sub farm{
|
|
2030 my ($self, $farm) = @_;
|
|
2031
|
|
2032 $self->{'farm'} ||= undef; #define farm
|
|
2033
|
|
2034 if (defined $farm) {
|
|
2035 throw("Argument to farm must be a boolean 1 or 0") if(! ($farm == 1 || $farm == 0));
|
|
2036 $self->{'farm'} = $farm;
|
|
2037 }
|
|
2038
|
|
2039 return $self->{'farm'};
|
|
2040
|
|
2041 }
|
|
2042
|
|
2043 =head2 batch_job
|
|
2044
|
|
2045 Arg [1] : Boolean
|
|
2046 Example : $importer->batch_job(1);
|
|
2047 Description: Flag to turn on batch_job status
|
|
2048 Returntype : Boolean
|
|
2049 Exceptions : Throws is argument not a boolean
|
|
2050 Caller : general
|
|
2051 Status : At risk
|
|
2052
|
|
2053 =cut
|
|
2054
|
|
2055
|
|
2056 sub batch_job{
|
|
2057 my ($self, $batch_job) = @_;
|
|
2058
|
|
2059 #$self->{'batch_job'} ||= undef;
|
|
2060
|
|
2061 if (defined $batch_job) {
|
|
2062 throw("Argument to batch_job must be a boolean 1 or 0") if(! ($batch_job == 1 || $batch_job == 0));
|
|
2063 $self->{'batch_job'} = $batch_job;
|
|
2064 }
|
|
2065
|
|
2066 return $self->{'batch_job'};
|
|
2067
|
|
2068 }
|
|
2069
|
|
2070 =head2 prepared
|
|
2071
|
|
2072 Arg [1] : Boolean
|
|
2073 Example : $importer->prepared(1);
|
|
2074 Description: Flag to turn on prepared file status
|
|
2075 This signifies that the files have been previously imported
|
|
2076 using prepare mode and may not match the InputSubset names
|
|
2077 Returntype : Boolean
|
|
2078 Exceptions : None
|
|
2079 Caller : general
|
|
2080 Status : At risk
|
|
2081
|
|
2082 =cut
|
|
2083
|
|
2084
|
|
2085 sub prepared{
|
|
2086 my ($self, $prepared) = @_;
|
|
2087 $self->{'prepared'} = $prepared if (defined $prepared);
|
|
2088 return $self->{'prepared'};
|
|
2089 }
|
|
2090
|
|
2091
|
|
2092 =head2 R_norm
|
|
2093
|
|
2094 Example : $self->R_norm(@logic_names);
|
|
2095 Description: Performs R normalisations for given logic names
|
|
2096 Returntype : none
|
|
2097 Exceptions : Throws if R exits with error code or if data not not valid for analysis
|
|
2098 Caller : general
|
|
2099 Status : At risk
|
|
2100
|
|
2101 =cut
|
|
2102
|
|
2103 sub R_norm{
|
|
2104 my ($self, @logic_names) = @_;
|
|
2105 #This currently normalises a single two colour chip at a time
|
|
2106 #rather than normalising across a set of chip
|
|
2107 #also does in sets of analyses
|
|
2108 #good for keeping data separate, but not efficient in terms of querying
|
|
2109 #convert to use one script which only queries for each echip once, then does each anal
|
|
2110
|
|
2111
|
|
2112 my $aa = $self->db->get_AnalysisAdaptor();
|
|
2113 my $rset_adaptor = $self->db->get_ResultSetAdaptor();
|
|
2114 my $ra_id = $aa->fetch_by_logic_name("RawValue")->dbID();
|
|
2115 my %r_config = (
|
|
2116 "VSN_GLOG" => {( libs => ['vsn'],
|
|
2117 #norm_method => 'vsn',
|
|
2118 )},
|
|
2119
|
|
2120 "T.Biweight" => {(
|
|
2121 libs => ['affy'],
|
|
2122 #norm_method => 'tukey.biweight',
|
|
2123 )},
|
|
2124 );
|
|
2125
|
|
2126
|
|
2127 foreach my $logic_name (@logic_names) {
|
|
2128 #throw("Not yet implemented TukeyBiweight") if $logic_name eq "Tukey_Biweight";
|
|
2129
|
|
2130 #this has already been chcecked and set as the norm_analysis
|
|
2131 #need to resolve this multi method approach
|
|
2132 my $norm_anal = $aa->fetch_by_logic_name($logic_name);
|
|
2133
|
|
2134
|
|
2135
|
|
2136 #This only creates an RSet for the IMPORT set
|
|
2137 #So if we re-run with a different analysis
|
|
2138 #tab2mage will have already been validated
|
|
2139 #So RSet generation will be skipped
|
|
2140 #We need to recreate the each non-import RSet for this norm analysis
|
|
2141
|
|
2142 #This also means the RSets are being created before the data has been imported
|
|
2143 #This avoids having to parse tab2mage each time but means we have an uncertain status of these Rsets
|
|
2144
|
|
2145 my $rset = $self->get_import_ResultSet($norm_anal, 'experimental_chip');
|
|
2146
|
|
2147 my @chips = ();
|
|
2148
|
|
2149 if (! $rset) {
|
|
2150 $self->log("All ExperimentalChips already have status:\t${logic_name}");
|
|
2151 } else { #Got data to normalise and import
|
|
2152 my @dbids;
|
|
2153 my $R_file = $self->get_dir("norm")."/${logic_name}.R";
|
|
2154 my $job_name = $self->experiment->name()."_${logic_name}";
|
|
2155 my $resultfile = $self->get_dir("norm")."/result.${logic_name}.txt";
|
|
2156 my $outfile = $self->get_dir("norm")."/${logic_name}.out";
|
|
2157
|
|
2158 #How do we get farm job output i.e. run time memusage
|
|
2159 #from interactive job?
|
|
2160 #This assumes R_PATH
|
|
2161 my $errfile = $self->get_dir("norm")."/${logic_name}.err";
|
|
2162
|
|
2163 #Let's build this better so we capture the farm output aswell as the job output.
|
|
2164 my $cmdline = "$ENV{'R_PATH'} --no-save < $R_file"; # >$errfile 2>&1";
|
|
2165 #-K option waits for job to complete
|
|
2166 my $bsub = "bsub -K -J $job_name ".$ENV{'R_BSUB_OPTIONS'}.
|
|
2167 " -e $errfile -o $outfile $ENV{'R_FARM_PATH'} CMD BATCH $R_file";
|
|
2168
|
|
2169 #Can we separate the out and err for commandline?
|
|
2170 my $r_cmd = (! $self->farm()) ? "$cmdline >$outfile 2>&1" : $bsub;
|
|
2171
|
|
2172 $self->backup_file($resultfile); #Need to do this as we're appending in the loop
|
|
2173
|
|
2174 #setup qurey
|
|
2175 #warn "Need to add host and port here";
|
|
2176 #Set up DB, defaults and libs for each logic name
|
|
2177 my $query = "options(scipen=20);library(RMySQL);library(Ringo);";
|
|
2178 #scipen is to prevent probe_ids being converted to exponents
|
|
2179 #Ringo is for default QC
|
|
2180
|
|
2181 #foreach my $ln(@logic_names){
|
|
2182
|
|
2183 foreach my $lib (@{$r_config{$logic_name}{'libs'}}) {
|
|
2184 $query .= "library($lib);";
|
|
2185 }
|
|
2186 #}
|
|
2187
|
|
2188 $query .= "con<-dbConnect(dbDriver(\"MySQL\"), host=\"".$self->db->dbc->host()."\", port=".$self->db->dbc->port().", dbname=\"".$self->db->dbc->dbname()."\", user=\"".$self->db->dbc->username()."\"";
|
|
2189
|
|
2190 #should use read only pass here as we are printing this to file
|
|
2191 $query .= ", pass=\"".$self->db->dbc->password."\")\n";
|
|
2192
|
|
2193
|
|
2194 #Build queries for each chip
|
|
2195 foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) {
|
|
2196
|
|
2197
|
|
2198 #should implement logic name here?
|
|
2199 #can't as we need seperate ResultSet for each
|
|
2200
|
|
2201
|
|
2202 if ($echip->has_status($logic_name)) {
|
|
2203 $self->log("ExperimentalChip ".$echip->unique_id()." already has status:\t$logic_name");
|
|
2204 } else {
|
|
2205
|
|
2206 #warn "Need to roll back here if recovery, as norm import may fail halfway through";
|
|
2207
|
|
2208 push @chips, $echip;
|
|
2209 my $cc_id = $rset->get_chip_channel_id($echip->dbID());
|
|
2210
|
|
2211 #if ($self->recovery()){
|
|
2212 # $self->log('Rolling back results for ExperimentalChip('.$echip->dbID().") $logic_name");
|
|
2213 # $self->db->rollback_results($cc_id) if $self->recovery();
|
|
2214 # }
|
|
2215
|
|
2216 $self->log("Building $logic_name R cmd for ".$echip->unique_id());
|
|
2217 @dbids = ();
|
|
2218
|
|
2219 foreach my $chan (@{$echip->get_Channels()}) {
|
|
2220
|
|
2221 if ($chan->type() eq "EXPERIMENTAL") {
|
|
2222 push @dbids, $chan->dbID();
|
|
2223 } else {
|
|
2224 unshift @dbids, $chan->dbID();
|
|
2225 }
|
|
2226 }
|
|
2227
|
|
2228 throw("vsn does not accomodate more than 2 channels") if (scalar(@dbids > 2) && $logic_name eq "VSN_GLOG");
|
|
2229
|
|
2230 #should do some of this with maps?
|
|
2231 #HARDCODED metric ID for raw data as one
|
|
2232
|
|
2233 #Need to get total and experimental here and set db_id accordingly
|
|
2234 #can probably do this directly into one df
|
|
2235
|
|
2236 $query .= "c1<-dbGetQuery(con, 'select r.probe_id as PROBE_ID, r.score as CONTROL_score, r.X, r.Y from result r, chip_channel c, result_set rs where c.table_name=\"channel\" and c.table_id=${dbids[0]} and c.result_set_id=rs.result_set_id and rs.analysis_id=${ra_id} and c.chip_channel_id=r.chip_channel_id')\n";
|
|
2237
|
|
2238 $query .= "c2<-dbGetQuery(con, 'select r.probe_id as PROBE_ID, r.score as EXPERIMENTAL_score, r.X, r.Y from result r, chip_channel c, result_set rs where c.table_name=\"channel\" and c.table_id=${dbids[1]} and c.result_set_id=rs.result_set_id and rs.analysis_id=${ra_id} and c.chip_channel_id=r.chip_channel_id')\n";
|
|
2239
|
|
2240 #Can we define some of the basic structures here and reuse in the QC and each norm method?
|
|
2241
|
|
2242
|
|
2243 #Is this going to eat up memory?
|
|
2244 #can we strip out and separate the data from c1 and c2 into RGList and
|
|
2245 #individual vector for probe_ids, then rm c1 and c2 to free up memory
|
|
2246
|
|
2247 #create RGList object
|
|
2248 $query .= "R<-as.matrix(c1['CONTROL_score'])\nG<-as.matrix(c2['EXPERIMENTAL_score'])\n";
|
|
2249 $query .= "genes<-cbind(c1['PROBE_ID'], c1['X'], c1['Y'])\n";
|
|
2250 $query .= "testRG<-new('RGList', list(R=R, G=G, genes=genes))\n";
|
|
2251
|
|
2252
|
|
2253 #QC plots here before doing norm
|
|
2254
|
|
2255 #open pdf device
|
|
2256 $query .= "pdf('".$self->get_dir('norm').'/'.$echip->unique_id."_QC.pdf', paper='a4', height = 15, width = 9)\n";
|
|
2257 #set format
|
|
2258 $query .= "par(mfrow = c(2,2), font.lab = 2)\n";
|
|
2259
|
|
2260 #Channel densisties
|
|
2261 #These need limma or Ringo
|
|
2262 $query .= "plotDensities(testRG)\n";
|
|
2263
|
|
2264 #MvA Plot
|
|
2265
|
|
2266 $query .= 'meanLogA<-((log(testRG$R, base=exp(2)) + log(testRG$G, base=exp(2)))/2)'."\n";
|
|
2267 $query .= 'logIntRatioM<-(log(testRG$R, base=exp(2)) - log(testRG$G, base=exp(2)))'."\n";
|
|
2268 $query .= "yMin<-min(logIntRatioM)\n";
|
|
2269 $query .= "yMax<-max(logIntRatioM)\n";
|
|
2270
|
|
2271
|
|
2272
|
|
2273 #Need to validate yMax here
|
|
2274 #If is is Inf then we need to sort the vector and track back until we find the high real number
|
|
2275 #count number of Infs and note on MvA plot
|
|
2276 $query .= "infCount<-0\n";
|
|
2277 $query .= "if( yMax == Inf){; sortedM<-sort(logIntRatioM); lengthM<-length(logIntRatioM); indexM<-lengthM\n"
|
|
2278 ."while (yMax == Inf){; indexM<-(indexM-1); yMax<-sortedM[indexM];}; infCount<-(lengthM-indexM);}\n";
|
|
2279
|
|
2280 #
|
|
2281 $query .= "if(infCount == 0){\n";
|
|
2282 $query .= 'plot(meanLogA, logIntRatioM, xlab="A - Average Log Ratio",ylab="M - Log Ratio",pch=".",ylim=c(yMin,yMax), main="'.$echip->unique_id.'")'."\n";
|
|
2283 $query .= "} else {\n";
|
|
2284 $query .= 'plot(meanLogA, logIntRatioM, xlab="A - Average Log Ratio",ylab="M - Log Ratio",pch=".",ylim=c(yMin,yMax), main="'.$echip->unique_id.'", sub=paste(infCount, " Inf values not plotted"));'."}\n";
|
|
2285
|
|
2286
|
|
2287 #$query .= 'plot(log(testRG$R*testRG$G, base=exp(2))/2, log(testRG$R/testRG$G, base=exp(2)),xlab="A",ylab="M",pch=".",ylim=c(-3,3), main="'.$echip->unique_id."\")\n";
|
|
2288
|
|
2289 #Plate plots
|
|
2290 $query .= 'image(testRG, 1, channel = "green", mycols = c("black", "green4", "springgreen"))'."\n";
|
|
2291 $query .= 'image(testRG, 1, channel = "red", mycols = c("black", "green4", "springgreen"))'."\n";
|
|
2292
|
|
2293 $query .= "dev.off()\n";
|
|
2294 #Finished QC pdf printing
|
|
2295
|
|
2296
|
|
2297
|
|
2298 #The simple preprocess step of Ringo is actually vsn, so we can nest these in line
|
|
2299
|
|
2300
|
|
2301 ### Build Analyses cmds ###
|
|
2302
|
|
2303 if ($logic_name eq 'T.Biweight') {
|
|
2304
|
|
2305 #log2 ratios
|
|
2306 $query .= 'lr_df<-cbind((log(c2["EXPERIMENTAL_score"], base=exp(2)) - log(c1["CONTROL_score"], base=exp(2))))'."\n";
|
|
2307
|
|
2308 #Adjust using tukey.biweight weighted average
|
|
2309 #inherits first col name
|
|
2310 $query .= 'norm_df<-(lr_df["EXPERIMENTAL_score"]-tukey.biweight(as.matrix(lr_df)))'."\n";
|
|
2311 $query .= 'formatted_df<-cbind(rep.int(0, length(c1["PROBE_ID"])), c1["PROBE_ID"], sprintf("%.3f", norm_df[,1]), rep.int('.$cc_id.', length(c1["PROBE_ID"])), c1["X"], c1["Y"])'."\n";
|
|
2312
|
|
2313 } elsif ($logic_name eq 'VSN_GLOG') {
|
|
2314 #could do this directly
|
|
2315 $query .= "raw_df<-cbind(c1[\"CONTROL_score\"], c2[\"EXPERIMENTAL_score\"])\n";
|
|
2316 #variance stabilise
|
|
2317 $query .= "norm_df<-vsn(raw_df)\n";
|
|
2318
|
|
2319
|
|
2320 #do some more calcs here and print report?
|
|
2321 #fold change exponentiate? See VSN docs
|
|
2322 #should do someplot's of raw and glog and save here?
|
|
2323 #set log func and params
|
|
2324 #$query .= "par(mfrow = c(1, 2)); log.na = function(x) log(ifelse(x > 0, x, NA));";
|
|
2325 #plot
|
|
2326 #$query .= "plot(exprs(glog_df), main = \"vsn\", pch = \".\");".
|
|
2327 # "plot(log.na(exprs(raw_df)), main = \"raw\", pch = \".\");";
|
|
2328 #FAILS ON RAW PLOT!!
|
|
2329 #par(mfrow = c(1, 2))
|
|
2330 #> meanSdPlot(nkid, ranks = TRUE)
|
|
2331 #> meanSdPlot(nkid, ranks = FALSE)
|
|
2332
|
|
2333
|
|
2334 #Now create table structure with glog values(diffs)
|
|
2335 #3 sig dec places on scores(doesn't work?!)
|
|
2336 $query .= 'formatted_df<-cbind(rep.int(0, length(c1["PROBE_ID"])), c1["PROBE_ID"], sprintf("%.3f", (exprs(norm_df[,2]) - exprs(norm_df[,1]))), rep.int('.$cc_id.', length(c1["PROBE_ID"])), c1["X"], c1["Y"])'."\n";
|
|
2337
|
|
2338 }
|
|
2339 #load back into DB
|
|
2340 #c3results<-cbind(rep("", length(c3["probe_id"])), c3["probe_id"], c3["c3_score"], rep(1, length(c3["probe_id"])), rep(1, length(c3["probe_id"])))
|
|
2341 #may want to use safe.write here
|
|
2342 #dbWriteTable(con, "result", c3results, append=TRUE)
|
|
2343 #dbWriteTable returns true but does not load any data into table!!!
|
|
2344
|
|
2345 $query .= "write.table(formatted_df, file=\"${resultfile}\", sep=\"\\t\", col.names=FALSE, row.names=FALSE, quote=FALSE, append=TRUE)\n";
|
|
2346
|
|
2347 #tidy up here??
|
|
2348 }
|
|
2349 }
|
|
2350
|
|
2351 $query .= "q();";
|
|
2352
|
|
2353 open(RFILE, ">$R_file") || throw("Cannot open $R_file for writing");
|
|
2354 print RFILE $query;
|
|
2355 close(RFILE);
|
|
2356
|
|
2357 my $submit_text = "Submitting $logic_name job";
|
|
2358 $submit_text .= ' to farm' if $self->farm;
|
|
2359 $self->log("${submit_text}:\t".localtime());
|
|
2360 run_system_cmd($r_cmd);
|
|
2361 $self->log("Finished $logic_name job:\t".localtime());
|
|
2362 $self->log('See '.$self->get_dir('norm').' for ExperimentalChip QC files');
|
|
2363
|
|
2364 #Now load file and update status
|
|
2365 #Import directly here to avoid having to reparse all results if we crash!!!!
|
|
2366 $self->log("Importing:\t$resultfile");
|
|
2367 $self->db->load_table_data("result", $resultfile);
|
|
2368 $self->log("Finishing importing:\t$resultfile");
|
|
2369
|
|
2370
|
|
2371 foreach my $echip (@chips) {
|
|
2372 $echip->adaptor->store_status($logic_name, $echip);
|
|
2373 }
|
|
2374
|
|
2375 #Recreate all non-import RSets for analysis if not already present
|
|
2376 #
|
|
2377
|
|
2378 my $rset_a = $self->db->get_ResultSetAdaptor();
|
|
2379 my %seen_rsets;
|
|
2380
|
|
2381 foreach my $anal_rset (@{$rset_a->fetch_all_by_Experiment($self->experiment)}) {
|
|
2382 next if($anal_rset->name =~ /_IMPORT$/o);
|
|
2383 next if(exists $seen_rsets{$anal_rset->name});
|
|
2384 next if $anal_rset->analysis->logic_name eq $norm_anal->logic_name;
|
|
2385 $seen_rsets{$rset->name} = 1;
|
|
2386 $anal_rset->analysis($norm_anal);
|
|
2387 $anal_rset->{'dbID'} = undef;
|
|
2388 $anal_rset->{'adaptor'} = undef;
|
|
2389
|
|
2390 #add the chip_channel_ids from the new anal IMPORT set
|
|
2391 foreach my $table_id (@{$anal_rset->table_ids}) {
|
|
2392 $anal_rset->{'table_id_hash'}{$table_id} = $rset->get_chip_channel_id($table_id);
|
|
2393 }
|
|
2394
|
|
2395 $self->log('Adding new ResultSet '.$anal_rset->name.' with analysis '.$norm_anal->logic_name);
|
|
2396 $rset_a->store($anal_rset);
|
|
2397 }
|
|
2398
|
|
2399
|
|
2400
|
|
2401 }
|
|
2402 }
|
|
2403
|
|
2404 return;
|
|
2405 }
|
|
2406
|
|
2407 #can we sub this? args: table_name, logic_name
|
|
2408 #also use result_set_name
|
|
2409 #would also clean all data for result set if recovery
|
|
2410 #return would be result_set
|
|
2411 #Can we extend this to incorporate InputSet parser define_sets?
|
|
2412
|
|
2413 sub get_import_ResultSet{
|
|
2414 my ($self, $anal, $table_name) = @_;
|
|
2415
|
|
2416 if (!($anal && $anal->isa("Bio::EnsEMBL::Analysis") && $anal->dbID())) {
|
|
2417 throw("Must provide a valid stored Bio::EnsEMBL::Analysis");
|
|
2418 }
|
|
2419
|
|
2420 $self->log("Getting import $table_name ResultSet for analysis:\t".$anal->logic_name());
|
|
2421
|
|
2422 my ($rset, @new_chip_channels);
|
|
2423 my $result_adaptor = $self->db->get_ResultSetAdaptor();
|
|
2424 my $logic_name = $anal->logic_name;
|
|
2425 my $status = ($logic_name eq "RawValue") ? "IMPORTED" : $logic_name;
|
|
2426
|
|
2427 if (($logic_name) eq 'RawValue' && ($table_name eq 'experimental_chip')) {
|
|
2428 throw("Cannot have an ExperimentalChip ResultSet with a RawValue analysis, either specify 'channel' or another analysis");
|
|
2429 }
|
|
2430
|
|
2431 #Build IMPORT Set for $table_name
|
|
2432 foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) {
|
|
2433
|
|
2434 #clean chip import and generate rset
|
|
2435
|
|
2436 if ($table_name eq 'experimental_chip') {
|
|
2437
|
|
2438 if ($echip->has_status($status)) { #this translates to each channel have the IMPORTED_RawValue status
|
|
2439 $self->log("ExperimentalChip(".$echip->unique_id().") already has status:\t".$status);
|
|
2440 } else {
|
|
2441 $self->log("Found ExperimentalChip(".$echip->unique_id().") without status $status");
|
|
2442
|
|
2443 push @new_chip_channels, $echip;
|
|
2444 }
|
|
2445
|
|
2446 } else { #channel
|
|
2447
|
|
2448 foreach my $chan (@{$echip->get_Channels()}) {
|
|
2449
|
|
2450 if ($chan->has_status($status)) { #this translates to each channel have the IMPORTED_RawValue status
|
|
2451 $self->log("Channel(".$echip->unique_id()."_".$self->get_config('dye_freqs')->{$chan->dye()}.") already has status:\t".$status);
|
|
2452 } else {
|
|
2453 $self->log("Found Channel(".$echip->unique_id()."_".$self->get_config('dye_freqs')->{$chan->dye()}.") without status $status");
|
|
2454 push @new_chip_channels, $chan;
|
|
2455 }
|
|
2456 }
|
|
2457 }
|
|
2458
|
|
2459 if (( ! $rset) && @new_chip_channels) {
|
|
2460 my(@tmp) = @{$result_adaptor->fetch_all_by_name_Analysis($self->name()."_IMPORT", $anal)};
|
|
2461
|
|
2462 if (scalar(@tmp) > 1) {
|
|
2463 throw('Found more than one IMPORT ResultSet for '.$self->name().'_IMPORT with analysis '.$logic_name);
|
|
2464 }
|
|
2465
|
|
2466 $rset = shift @tmp;
|
|
2467
|
|
2468
|
|
2469 #do we need to throw here if not recovery?
|
|
2470 #what if we want the import result set elsewhere during the first import?
|
|
2471
|
|
2472 #if ($self->recovery()) {
|
|
2473
|
|
2474 #fetch by anal and experiment_id
|
|
2475 #Need to change this to result_set.name!
|
|
2476 # warn("add chip set handling here");
|
|
2477
|
|
2478 #my @tmp = @{$result_adaptor->fetch_all_by_Experiment_Analysis($self->experiment(), $anal)};
|
|
2479 #throw("Found more than one ResultSet for Experiment:\t".$self->experiment->name()."\tAnalysis:\t".$anal->logic_name().')' if (scalar(@tmp) >1);
|
|
2480 #$rset = $tmp[0];
|
|
2481
|
|
2482 #warn "fetching rset with ".$self->name()."_IMPORT ". $anal->logic_name;
|
|
2483
|
|
2484 #$rset = $result_adaptor->fetch_by_name_Analysis($self->name()."_IMPORT", $anal);
|
|
2485 warn("Warning: Could not find recovery ResultSet for analysis ".$logic_name) if ! $rset;
|
|
2486 #}
|
|
2487
|
|
2488 if (! $rset) {
|
|
2489 $self->log("Generating new ResultSet for analysis ".$logic_name);
|
|
2490
|
|
2491 $rset = Bio::EnsEMBL::Funcgen::ResultSet->new
|
|
2492 (
|
|
2493 -analysis => $anal,
|
|
2494 -table_name => $table_name,
|
|
2495 -name => $self->name()."_IMPORT",
|
|
2496 -feature_type => $self->feature_type(),
|
|
2497 -cell_type => $self->cell_type(),
|
|
2498 );
|
|
2499
|
|
2500 #These types should be set to NULL during the MAGE-XML validation if we have more than one type in an experiment
|
|
2501
|
|
2502 ($rset) = @{$result_adaptor->store($rset)};
|
|
2503 }
|
|
2504 }
|
|
2505 }
|
|
2506
|
|
2507 #do we need this here as we're rolling back in the read methods?
|
|
2508 #we only want to roll back those chips/channels which have not been registered
|
|
2509
|
|
2510 if ($self->recovery()) {
|
|
2511
|
|
2512 my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor();
|
|
2513
|
|
2514 foreach my $cc (@new_chip_channels) {
|
|
2515
|
|
2516 #only roll back if already part of import set
|
|
2517 #Not previously registered if not
|
|
2518 if ($rset->contains($cc) && $rset->get_chip_channel_id($cc->dbID())) {
|
|
2519
|
|
2520 if ($table_name eq 'channel') {
|
|
2521 my $chan_name = $ec_adaptor->fetch_by_dbID($cc->experimental_chip_id())->unique_id()."_".
|
|
2522 $self->get_config('dye_freqs')->{$cc->dye()};
|
|
2523 $self->log("Rolling back results for $table_name:\t".$chan_name);
|
|
2524
|
|
2525 } else {
|
|
2526 $self->log("Rolling back results for $table_name:\t".$cc->unique_id);
|
|
2527 }
|
|
2528
|
|
2529 $self->rollback_results([$rset->get_chip_channel_id($cc->dbID())]);
|
|
2530 }
|
|
2531 }
|
|
2532 }
|
|
2533
|
|
2534
|
|
2535 #check whether it is present in the ResultSet and add if not
|
|
2536 if ($rset) {
|
|
2537 #ids will already be present if not rset i.e. already imported
|
|
2538
|
|
2539 foreach my $cc (@new_chip_channels) {
|
|
2540 $rset->add_table_id($cc->dbID()) if(! $rset->contains($cc));
|
|
2541 }
|
|
2542 }
|
|
2543
|
|
2544
|
|
2545
|
|
2546 if ($rset) {
|
|
2547 $result_adaptor->store_chip_channels($rset);
|
|
2548 } else {
|
|
2549 $self->log("All ExperimentalChips have status:\t$status");
|
|
2550 }
|
|
2551
|
|
2552 #this only returns a result set if there is new data to import
|
|
2553 return $rset;
|
|
2554 }
|
|
2555
|
|
2556
|
|
2557
|
|
2558 =head2 resolve_probe_data
|
|
2559
|
|
2560 Example : $self->resolve_probe_data();
|
|
2561 Description: Resolves DB probe duplicates and builds local probe cache
|
|
2562 Returntype : none
|
|
2563 Exceptions : ????
|
|
2564 Caller : general
|
|
2565 Status : At risk
|
|
2566
|
|
2567 =cut
|
|
2568
|
|
2569 sub resolve_probe_data{
|
|
2570 my $self = shift;
|
|
2571
|
|
2572 $self->log("Resolving probe data", 1);
|
|
2573
|
|
2574 warn "Probe cache resolution needs to accomodate probesets too!";
|
|
2575
|
|
2576 foreach my $array (@{$self->arrays()}) {
|
|
2577 my $resolve = 0;
|
|
2578
|
|
2579 if ($self->get_probe_cache_by_Array($array)) { #cache already generated
|
|
2580
|
|
2581 #check if we have any new unresolved array chips to add to the cache
|
|
2582 foreach my $achip (@{$array->get_ArrayChips()}) {
|
|
2583
|
|
2584 if ($achip->has_status('RESOLVED')) {
|
|
2585 $self->log("ArrayChip has RESOLVED status:\t".$achip->design_id()); #, 1);
|
|
2586 next;
|
|
2587 } else {
|
|
2588 $self->log("Found un-RESOLVED ArrayChip:\t".$achip->design_id());
|
|
2589 $resolve = 1;
|
|
2590 last;
|
|
2591 }
|
|
2592 }
|
|
2593 } else { #no cache file
|
|
2594 $resolve = 1;
|
|
2595 $self->log('No probe cache found for array '.$array->name());
|
|
2596 }
|
|
2597
|
|
2598 if ($resolve) {
|
|
2599 $self->log('Resolving array duplicates('.$array->name().') and rebuilding probe cache.', 1);
|
|
2600 $self->get_probe_cache_by_Array($array, 1); #get from DB
|
|
2601
|
|
2602 #we need ot make sure we mark cache as unresolved, so we don't use it by mistake.
|
|
2603
|
|
2604
|
|
2605
|
|
2606 my ($line, $name, $pid, @pids);
|
|
2607 #my $index = 0;
|
|
2608 my $tmp_name = '';
|
|
2609 my $tmp_id = '';
|
|
2610
|
|
2611 #miss the header
|
|
2612
|
|
2613 while ($line = $self->{'_probe_cache'}{$array->name}{'handle'}->getline()) {
|
|
2614 ($name, $pid) = split/\t/o, $line;
|
|
2615
|
|
2616 if ($name eq $tmp_name) {
|
|
2617
|
|
2618 if ($pid != $tmp_id) {
|
|
2619 push @pids, $pid;
|
|
2620 #should reset to pid here if we have x y data else undef
|
|
2621 #ignore this and force result to have x y
|
|
2622 }
|
|
2623
|
|
2624 #can't do this naymore unless we figure out how to move the line pointer
|
|
2625 #would still need to sed the file anyway, better to regen from DB?
|
|
2626 #undef $self->{'_probe_cache'}{$array->name}{'entries'}->[$i];#delete true or to be resolved duplicate
|
|
2627 } elsif ($name ne $tmp_name) { #new probe
|
|
2628 $self->tidy_duplicates(\@pids) if(scalar(@pids) > 1);
|
|
2629 $tmp_name = $name;
|
|
2630 $tmp_id = $pid;
|
|
2631 @pids = ($pid);
|
|
2632 #$index = $i + 1;
|
|
2633 }
|
|
2634 }
|
|
2635
|
|
2636 $self->tidy_duplicates(\@pids) if(scalar(@pids) > 1);
|
|
2637
|
|
2638 #rename resovled cache and reset cache handle
|
|
2639 my $cmd = 'mv '.$self->get_dir('caches').'/'.$array->name().'.probe_cache.unresolved '.
|
|
2640 $self->get_dir('caches').'/'.$array->name().'.probe_cache';
|
|
2641
|
|
2642 run_system_cmd($cmd);
|
|
2643 $self->get_probe_cache_by_Array($array); #This sets the caches
|
|
2644
|
|
2645
|
|
2646 #warn "Only generate MD5 here, as this is guranteed to be correct";
|
|
2647
|
|
2648 foreach my $achip (@{$array->get_ArrayChips()}) {
|
|
2649
|
|
2650 if (! $achip->has_status('RESOLVED')) {
|
|
2651 $self->log("Updating ArrayChip to RESOLVED status:\t".$achip->design_id());
|
|
2652 $achip->adaptor->store_status('RESOLVED', $achip);
|
|
2653 }
|
|
2654 }
|
|
2655
|
|
2656 $self->log('Finished building probe cache for '.$array->name(), 1);
|
|
2657 }
|
|
2658 }
|
|
2659
|
|
2660 $self->log('Finished resolving probe data', 1);
|
|
2661
|
|
2662 return;
|
|
2663 }
|
|
2664
|
|
2665
|
|
2666 sub tidy_duplicates{
|
|
2667 my ($self, $pids) = @_;
|
|
2668
|
|
2669 my $pfa = $self->db->get_ProbeFeatureAdaptor();
|
|
2670 my ($feature, %features);
|
|
2671
|
|
2672 foreach my $dup_id (@$pids) {
|
|
2673
|
|
2674 foreach $feature(@{$pfa->fetch_all_by_Probe_id($dup_id)}) {
|
|
2675 #can we safely assume end will be same too?
|
|
2676 push @{$features{$feature->seq_region_name().':'.$feature->start()}}, $feature;
|
|
2677 }
|
|
2678 }
|
|
2679
|
|
2680 my (@reassign_ids, @delete_ids);
|
|
2681
|
|
2682 foreach my $seq_start_key (keys %features) {
|
|
2683 my $reassign_features = 1;
|
|
2684
|
|
2685 foreach $feature(@{$features{$seq_start_key}}) {
|
|
2686
|
|
2687 if ($feature->probe_id() == $pids->[0]) {
|
|
2688 $reassign_features = 0;
|
|
2689 } else {
|
|
2690 push @delete_ids, $feature->dbID();
|
|
2691 }
|
|
2692 }
|
|
2693
|
|
2694 #This assumes that we actually have at least one element to every seq_start_key array
|
|
2695 if ($reassign_features) {
|
|
2696 my $new_fid = pop @delete_ids;
|
|
2697 push @reassign_ids, $new_fid;
|
|
2698 }
|
|
2699 }
|
|
2700
|
|
2701 #resolve features first so we don't get any orphaned features if we crash.
|
|
2702 $pfa->reassign_features_to_probe(\@reassign_ids, $pids->[0]) if @reassign_ids;
|
|
2703 $pfa->delete_features(\@delete_ids) if @delete_ids;
|
|
2704
|
|
2705 return;
|
|
2706 }
|
|
2707
|
|
2708 1;
|