comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/Nimblegen.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 #
2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::Parsers::Nimblegen
3 #
4
5 =head1 LICENSE
6
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and
8 Genome Research Limited. All rights reserved.
9
10 This software is distributed under a modified Apache license.
11 For license details, please see
12
13 http://www.ensembl.org/info/about/code_licence.html
14
15 =head1 CONTACT
16
17 Please email comments or questions to the public Ensembl
18 developers list at <ensembl-dev@ebi.ac.uk>.
19
20 Questions may also be sent to the Ensembl help desk at
21 <helpdesk@ensembl.org>.
22
23 =head1 NAME
24
25 Bio::EnsEMBL::Funcgen::Parsers::Nimblegen
26
27 =head1 SYNOPSIS
28
29 my $parser_type = "Bio::EnsEMBL::Funcgen::Parsers::Nimblegen";
30 push @INC, $parser_type;
31 my $imp = $class->SUPER::new(@_);
32
33
34 =head1 DESCRIPTION
35
36 This is a parser class which should not be instatiated directly, it
37 normally set by the Importer as the parent class. Nimblegen contains meta
38 data and methods specific to NimbleGen arrays to aid parsing and importing of
39 experimental data.
40
41 =cut
42
43 package Bio::EnsEMBL::Funcgen::Parsers::Nimblegen;
44
45 use Bio::EnsEMBL::Funcgen::Array;
46 use Bio::EnsEMBL::Funcgen::ProbeSet;
47 use Bio::EnsEMBL::Funcgen::Probe;
48 use Bio::EnsEMBL::Funcgen::ProbeFeature;
49 use Bio::EnsEMBL::Funcgen::FeatureType;
50 use Bio::EnsEMBL::Funcgen::ExperimentalChip;
51 use Bio::EnsEMBL::Funcgen::ArrayChip;
52 use Bio::EnsEMBL::Funcgen::Channel;
53 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate );
54 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw(species_chr_num open_file);
55 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
56 use Bio::EnsEMBL::Funcgen::Parsers::MAGE;
57
58 use strict;
59
60 use vars qw(@ISA);
61 @ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::MAGE);
62
63 =head2 new
64
65 Example : my $self = $class->SUPER::new(@_);
66 Description: Constructor method for Nimblegen class
67 Returntype : Bio::EnsEMBL::Funcgen::Parser::Nimblegen
68 Exceptions : throws if Experiment name not defined or if caller is not Importer
69 Caller : Bio::EnsEMBL::Funcgen::Importer
70 Status : at risk
71
72 =cut
73
74
75 sub new{
76 my $caller = shift;
77
78 my $class = ref($caller) || $caller;
79 my $self = $class->SUPER::new(@_);
80
81 throw("This is a skeleton class for Bio::EnsEMBL::Importer, should not be used directly") if(! $self->isa("Bio::EnsEMBL::Funcgen::Importer"));
82
83 #should we provide args override for all of these?
84
85 $self->{'config'} =
86 {(
87 #order of these data arrays is important!
88 #Remove these method arrays, snd just run them serially?
89 array_data => ['experiment'],#Rename this!!
90 probe_data => ["probe"],
91 results_data => ["and_import_results"],
92 sample_key_fields => ['DESIGN_ID', 'CHIP_ID', 'DYE', 'PROMOT_SAMPLE_TYPE'],# 'SAMPLE_LABEL'],label now optional
93 # 'SAMPLE_DESCRIPTION removed due to naming disparities
94 ndf_fields => ['CONTAINER', 'PROBE_SEQUENCE', 'MISMATCH','FEATURE_ID', 'PROBE_ID'],#MISMATCH is always 0!
95 pos_fields => ['CHROMOSOME', 'PROBE_ID', 'POSITION', 'COUNT'],
96 result_fields => ['PROBE_ID', 'PM', 'X', 'Y'],
97 notes_fields => ['DESIGN_ID', 'DESIGN_NAME', 'DESCRIPTION'],
98 norm_method => 'VSN_GLOG',
99 dye_freqs => {(
100 Cy5 => 635,
101 Cy3 => 532,
102 )},
103
104
105 #Need to make these definable?
106 #have protocolfile arg and just parse tab2mage protocol section format
107 protocols => {(
108 grow => {(
109 accession => 'GROW_NIMB',
110 name => 'GROW NIMBLEGEN CULTURE CONDITIONS',
111 text => 'Nimblegen culture conditions description here. Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
112 paramters => undef,
113 )},
114 treatment => {(
115 accession => 'CROSSLINK_NIMB',
116 name => 'NIMBLEGEN CHROMATIN PREPARATION',
117 text => 'Nimblegen X-linking and DNA extraction protocol.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
118 paramters => undef,
119 )},
120 extraction => {(
121 accession => 'CHROMATIN_IP_NIMB',
122 name => 'NIMBLEGEN CHROMATIN IMMUNOPRECIPITATION and DNA RECOVERY',
123 text => 'Nimblegen chromatin immunoprecipitation and DNA extraction protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
124 paramters => undef,
125 )},
126 labeling => {(
127 accession => 'LABELLING_NIMB',
128 name => 'NIMBLEGEN LABELLING',
129 text => 'Nimblegen labelling protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
130 paramteres => undef,
131 )},
132 hybridization => {(
133 accession => 'HYBRIDISATION_NIMB',
134 name => 'NIMBLEGEN HYBRIDISATION',
135 text => 'Nimblegen chip hybridisation protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
136 parameters => undef,
137 )},
138 scanning => {(
139 accession => 'SCANNING_NIMB',
140 name => 'NIMBLESCAN',
141 text => 'Nimblegen Nimblescan protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
142 paramters => undef,
143 )},
144 )},
145
146 )};
147
148 return $self;
149 }
150
151
152 =head2 set_config
153
154 Example : my $self->set_config;
155 Description: Sets attribute dependent config
156 Returntype : None
157 Exceptions : None
158 Caller : Bio::EnsEMBL::Funcgen::Importer
159 Status : at risk
160
161 =cut
162
163
164 sub set_config{
165 my $self = shift;
166
167 #This should be general config for all types of import
168 #dirs are not set in config to enable generic get_dir method access
169 #This is really just setting paths rather than config rename?
170
171 #This is generic for all imports
172
173
174
175 if($self->{'old_dvd_format'}){
176 $self->{'design_dir'} = $self->get_dir('input').'/DesignFiles';
177 }else{
178 $self->{'design_dir'} = $self->get_dir('input').'/Design_information';
179 }
180
181
182 if($self->{'old_dvd_format'}){
183 $self->{'config'}{'notes_file'} = $self->get_dir('input').'/DesignNotes.txt';
184 }else{
185 $self->{'config'}{'notes_file'} = $self->get_dir('design').'/DesignNotes.txt';
186 }
187
188 $self->{'config'}{'chip_file'} = $self->get_dir('input').'/SampleKey.txt';
189
190
191
192 #Experiment(output) specific
193 #This should already be set in the run script
194 #As we could get log write errors before we have created the output dir otherwise
195 $self->{'output_dir'} ||= $self->get_dir("data").'/output/'.$self->{'param_species'}.'/'.$self->vendor().'/'.$self->name();
196
197 $self->{'config'}{'tab2mage_file'} = $self->get_dir('output').'/E-TABM-'.$self->name().'.txt';
198
199 $self->{'config'}{'mage_xml_file'} = $self->get_dir('output').'/{UNASSIGNED}.xml';
200
201 if($self->{'old_dvd_format'}){
202 $self->{'results_dir'} = $self->get_dir('input').'/PairData';
203 }else{
204 $self->{'results_dir'} = $self->get_dir('input').'/Raw_data_files';
205 }
206
207 return;
208 }
209
210
211
212
213 =head2 read_array_data
214
215 Example : $imp->read_array_data();
216 Description: Parses NimbleGen DesignNotes.txt files to create and store new Arrays
217 Returntype : none
218 Exceptions : None
219 Caller : general
220 Status : At risk - Can this be generic? Can we force the creation of a DesignNotes file on other formats?
221
222 =cut
223
224
225 sub read_array_data{
226 my ($self, $notes_file) = @_;
227
228
229 $notes_file ||= $self->get_config('notes_file');
230 my ($line, $array, $array_chip, @data, %hpos);
231 my $oa_adaptor = $self->db->get_ArrayAdaptor();
232 my $ac_adaptor = $self->db->get_ArrayChipAdaptor();
233
234 #Slurp file to string, sets local delimtter to null and subs new lines
235 my $fh = open_file($notes_file);
236 #($design_desc = do { local ($/); <$fh>;}) =~ s/\r*\n$//;
237 #close($fh);
238
239 #Would be better if we only import the design info for the chips listed in the SampleKey.txt file
240 #Some cyclical dependency going on here :|
241
242 while ($line = <$fh>){
243
244 $line =~ s/\r*\n//;#chump
245 @data = split/\t/o, $line;
246
247 #We need to have a DESIGN vendor type?
248 #also need to be able to set file path independently of config
249
250 if($. == 1){
251 %hpos = %{$self->set_header_hash(\@data, $self->get_config('notes_fields'))};
252 next;
253 }
254
255 ### CREATE AND STORE Array and ArrayChips
256 if(! defined $array ){
257 #This is treating each array chip as a separate array, unless arrayset is defined
258 #AT present we have no way of differentiating between different array_chips on same array???!!!
259 #Need to add functionality afterwards to collate array_chips into single array
260
261 #This will use a stored array if present
262
263 $array = Bio::EnsEMBL::Funcgen::Array->new
264 (
265 -NAME => $self->array_name() || $data[$hpos{'DESIGN_NAME'}],
266 -FORMAT => uc($self->format()),
267 -VENDOR => uc($self->vendor()),
268 -TYPE => 'OLIGO',
269 -DESCRIPTION => $data[$hpos{'DESCRIPTION'}],#need to trim the array chip specific description here
270 );
271
272 ($array) = @{$oa_adaptor->store($array)};
273
274 $array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
275 -ARRAY_ID => $array->dbID(),
276 -NAME => $data[$hpos{'DESIGN_NAME'}],
277 -DESIGN_ID => $data[$hpos{'DESIGN_ID'}],
278 #add description?
279 );
280
281 #This will use a stored array_chip if present
282 ($array_chip) = @{$ac_adaptor->store($array_chip)};
283 $array->add_ArrayChip($array_chip);
284
285 }
286 elsif((! $array->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])) && ($self->array_set())){
287
288 $self->log("Generating new ArrayChip(".$data[$hpos{'DESIGN_NAME'}].") for same Array:\t".$array->name()."\n");
289
290 $array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
291 -ARRAY_ID => $array->dbID(),
292 -NAME => $data[$hpos{'DESIGN_NAME'}],
293 -DESIGN_ID => $data[$hpos{'DESIGN_ID'}],
294 );
295
296 ($array_chip) = @{$ac_adaptor->store($array_chip)};
297 $array->add_ArrayChip($array_chip);
298
299 }
300 elsif(! $array->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])){
301 throw("Found experiment with more than one design without -array_set");
302 }
303 }
304
305
306 $self->add_Array($array);
307
308 close($fh);
309
310 return;
311
312 }
313
314
315 =head2 read_experiment_data
316
317 Example : $imp->read_array_chip_data();
318 Description: Parses and imports array & experimental chip meta data/objects
319 Returntype : none
320 Exceptions : throws if more than one array/design found and not an "array set"
321 Caller : Importer
322 Status : At risk
323
324 =cut
325
326 sub read_experiment_data{
327 my $self = shift;
328
329 $self->read_array_data();
330
331 my $t2m_file = $self->init_tab2mage_export() if $self->{'write_mage'};
332
333
334 my ($design_desc, $line, $tmp_uid, $channel, $echip, $sample_label);
335 my ($sample_desc, %hpos, @data, %uid_reps, %did_reps, %sample_reps);
336 my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor();
337 my $chan_adaptor = $self->db->get_ChannelAdaptor();
338 my $br_cnt = 1;
339 my $tr_cnt = 1;
340
341 #Currently 1 design = 1 chip = 1 array /DVD
342 #Different designs are not currently collated into a chip_set/array in any ordered manner
343 #Register each design as an array and an array_chip
344 #May want to group array_chips into array/chip sets by association though the API
345
346
347 warn("Harcoded for one array(can have multiple chips from the same array) per experiment\n");
348 my $fh = open_file($self->get_config("chip_file"));
349 $self->log("Reading chip data");
350
351
352 #warn "Do we need to validate each line here against the header array?";
353
354 while ($line = <$fh>){
355 next if $line =~ /^\s+\r*\n/;
356 $line =~ s/\r*\n//;#chump
357 @data = split/\t/o, $line;
358 #we could validate line against scalar of header array
359 #ORD_ID CHIP_ID DYE DESIGN_NAME DESIGN_ID SAMPLE_LABEL SAMPLE_SPECIES SAMPLE_DESCRIPTION TISSUE_TREATMENT PROMOT_SAMPLE_TYPE
360 if ($. == 1){
361 %hpos = %{$self->set_header_hash(\@data, $self->get_config('sample_key_fields'))};
362
363 #we need to set the sample description field name, as it can vary :(((
364 @data = grep(/SAMPLE_DESCRIPTION/, keys %hpos);
365 $sample_desc = $data[0];
366
367 throw("More than one sample description(@data) in ".$self->get_config("chip_file")."\n") if(scalar @data >1);
368 next;
369 }
370
371 #Need to handle array class here i.e. two channel arrays will have two lines
372 #validate species here
373 #look up alias from registry and match to self->species
374 #registry may not be loaded for local installation
375
376
377 ### CREATE AND STORE ExperimentalChips
378 if ((! $tmp_uid) || ($data[$hpos{'CHIP_ID'}] ne $tmp_uid)){
379
380 #Test both channels are available, i.e. the SampleKey has two TOTAL channels
381 if($echip){
382
383 for my $type('TOTAL', 'EXPERIMENTAL'){
384
385 my $test_chan = $chan_adaptor->fetch_by_type_experimental_chip_id($type, $echip->dbID());
386 throw("ExperimentalChip(".$echip->unique_id().
387 ") does not have a $type channel, please check the SampleKey.txt file") if ! $test_chan;
388
389 }
390 }
391
392 $tmp_uid = $data[$hpos{'CHIP_ID'}];
393 $echip = $ec_adaptor->fetch_by_unique_id_vendor($data[$hpos{'CHIP_ID'}], 'NIMBLEGEN');
394
395 if($echip){
396
397 if(! $self->recovery()){
398 throw("ExperimentalChip(".$echip->unqiue_id().
399 " already exists in the database\nMaybe you want to recover?");
400 }
401 }else{
402
403 $echip = Bio::EnsEMBL::Funcgen::ExperimentalChip->new
404 (
405 -EXPERIMENT_ID => $self->experiment->dbID(),
406 -DESCRIPTION => $data[$hpos{$sample_desc}],
407 -FEATURE_TYPE => $self->feature_type,
408 -CELL_TYPE => $self->cell_type,
409 -ARRAY_CHIP_ID => $self->arrays->[0]->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])->dbID(),
410 -UNIQUE_ID => $data[$hpos{'CHIP_ID'}],
411 #-BIOLOGICAL_REPLICATE => ,
412 #-TECHNICAL_REPLICATE => ,
413 );
414
415 ($echip) = @{$ec_adaptor->store($echip)};
416 $self->experiment->add_ExperimentalChip($echip);
417 }
418 }
419
420
421 ### CREATE AND STORE Channels
422 my $type = uc($data[$hpos{'PROMOT_SAMPLE_TYPE'}]);
423 my $sample_label = (! exists $hpos{'SAMPLE_LABEL'}) ? '' : $data[$hpos{'SAMPLE_LABEL'}];
424
425
426 $type = 'TOTAL' if ($type ne 'EXPERIMENTAL');
427 $channel = $chan_adaptor->fetch_by_type_experimental_chip_id($type, $echip->dbID());
428
429 if($channel){
430 if(! $self->recovery()){
431 throw("Channel(".$echip->unqiue_id().":".uc($data[$hpos{'PROMOT_SAMPLE_TYPE'}]).
432 " already exists in the database\nMaybe you want to recover?");
433 }else{
434 #push @{$self->{'_rollback_ids'}}, $channel->dbID();
435 #No point in doing this as all Channels mey be pre-registered in recovery mode
436 #Hence all will be rolled back
437 }
438 }else{
439 #Handles single/mutli
440
441 $channel = Bio::EnsEMBL::Funcgen::Channel->new
442 (
443 -EXPERIMENTAL_CHIP_ID => $echip->dbID(),
444 -DYE => $data[$hpos{'DYE'}],
445 -SAMPLE_ID => $sample_label,
446 -TYPE => $type,
447 );
448
449 #-SPECIES => $self->species(),#on channel/sample to enable multi-species chip/experiment
450 #would never happen on one chip? May happen between chips in one experiment
451
452 ($channel) = @{$chan_adaptor->store($channel)};
453
454 }
455
456 #we need to build the channel level tab2mage line here
457
458 #For each BR there will be two sample_labels, one for each channel
459 #These will be used across multiple chips.
460 #If two chips the same design ID and the same sample labels, then we have a technical replicate
461 #else if they have different sample labels then we have another biological replicate
462 #We have a problem of associating channels to the same BR with differing sample labels
463 #This is solved by checking whether the chip ID has already been registered in a BR
464
465 #This fails if more than one sample label is used for any given BR
466 #This will result in the BR being split into the number of unique sample label pairs(Experimental/Control channel)
467 #This also fails if the same sample label has been used for two different BRs
468
469 if($self->{'write_mage'}){
470 #my $sample_name = ($sample_label eq '') ? '???' : substr($sample_label, 0, (length($sample_label)-1));
471 my $ctype_name = (defined $self->cell_type()) ? $self->cell_type->name() : '???';
472 my $ftype_name = (defined $self->feature_type()) ? $self->feature_type->name() : '???';
473 my $ctype_desc = (defined $self->cell_type()) ? $self->cell_type->description() : '???';
474
475 #define reps
476
477
478 #we need one to get the biorep based on the sample label and making sure the unique ID are the same
479 #we need to define the tech rep by matching the sample label and the making sure the design_id isn't already used
480
481 #Is this doing the BR assignment properly?
482
483 if(exists $sample_reps{$sample_label}){#Found chip in a previously seen BR
484 #Register the BR of this chip ID
485 $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'} = $sample_reps{$sample_label};
486
487 }
488 elsif(exists $uid_reps{$data[$hpos{'CHIP_ID'}]}){#Found the other channel
489 $sample_reps{$sample_label} = $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'};
490 }
491 else{#assign new br
492 $sample_reps{$sample_label} = $br_cnt; #Assign BR to sample label
493 $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'} = $br_cnt; #Assign BR to chip id
494 $br_cnt++;
495 }
496
497
498
499 #Something is going awry here. The TR is not being reset for some new BRs
500
501 if(! exists $uid_reps{$data[$hpos{'CHIP_ID'}]}{'tr'}){
502 #we only assign a new tr here if this design has not been seen in any of the reps
503 #i.e. we need to get the first tr which does not contain this design_id
504
505 my $create_rep = 1;
506 my $tr;
507 my @chip_ids;
508 my $br = $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'};
509
510 foreach my $chip_id(keys %uid_reps){
511
512 push @chip_ids, $chip_id if($uid_reps{$chip_id}{'br'} == $br);
513 }
514
515 #This is looping through all the TRs for all the design IDs
516
517 foreach my $rep(sort keys %did_reps){
518 #Doesn't exist for the given BR?
519 #So we need to get all the chip_ids for a given br
520 #Check wether it exists and wether it exists in did_reps and check wether the chip_id value is part of the BR set
521 #else we add it
522
523 if(! exists $did_reps{$rep}{$data[$hpos{'DESIGN_ID'}]}){
524 #Not seen in a TR of this $rep yet
525 $create_rep = 0;
526 }elsif(! grep(/$did_reps{$rep}{$data[$hpos{'DESIGN_ID'}]}/, @chip_ids)){
527 #Not seen in this BR with this TR $rep
528 $create_rep = 0;
529 }
530
531 if(! $create_rep){
532 #Design ID not seen so add to this TR
533 $did_reps{$rep}{$data[$hpos{'DESIGN_ID'}]} = $data[$hpos{'CHIP_ID'}]; #don't really need to assign this
534 $tr = $rep;
535 last;#do not remove this or we get wierd TR incrementing
536 }
537 }
538
539
540 if($create_rep){
541 #Get the next TR value for this given BR
542 my @trs;
543
544 foreach my $rep(keys %did_reps){
545
546 foreach my $chip_id(values %{$did_reps{$rep}}){
547
548 #Push TR if chip_id is present in this BR
549 push @trs, $rep if(grep(/$chip_id/, @chip_ids));
550 }
551 }
552 ($tr) = sort {$b<=>$a} @trs;
553
554 $tr ||=0;
555 $tr++;
556
557 #register design ID to chip ID mapping for this TR
558 $did_reps{$tr}{$data[$hpos{'DESIGN_ID'}]} = $data[$hpos{'CHIP_ID'}];
559 }
560
561 #register TR for this chip ID
562 $uid_reps{$data[$hpos{'CHIP_ID'}]}{'tr'} = $tr;
563 }
564
565 my $br = $self->experiment->name().'_BR'. $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'};
566 my $tr = $br.'_TR'.$uid_reps{$data[$hpos{'CHIP_ID'}]}{'tr'};
567
568
569 #File[raw]
570 my $tsm_line = $echip->unique_id().'_'.$self->get_config('dye_freqs')->{$data[$hpos{'DYE'}]}.'_pair.txt';
571 #Array[accession] # Should this be left blank for AE accession?
572 $tsm_line .= "\t".$data[$hpos{'DESIGN_ID'}];
573 #Array[serial]
574 $tsm_line .= "\t".$echip->unique_id();
575
576 #Protocol(s)[grow][treatment][extraction][labelling][hybridisation][scanning]
577 foreach my $protocol(sort (keys %{$self->get_config('protocols')})){
578 $tsm_line .= "\t".$self->get_config('protocols')->{$protocol}->{'accession'};
579 }
580
581
582 #BioSource
583 $tsm_line .= "\t$ctype_name";
584 #Sample
585 $tsm_line .= "\t$br";
586 #Extract
587 $tsm_line .= "\t$tr";
588
589 #LabeledExtract & Immunoprecipitate
590 if($type eq 'EXPERIMENTAL'){
591 $tsm_line .= "\t$sample_label - IP of $tr with anti $ftype_name (Ab vendor, Ab ID)";
592 $tsm_line .= "\t$tr IP";
593 }else{
594 $tsm_line .= "\t$sample_label - Input control DNA of $tr\t";
595 }
596
597 #Hybridization
598 #U2OS BR1_TR1 ChIP H3KAc 46092 hyb
599 $tsm_line .= "\t$ctype_name $tr ChIP $ftype_name ".$echip->unique_id().' hyb';
600
601 #BioSourceMaterial SampleMaterial ExtractMaterial LabeledExtractMaterial
602 $tsm_line .= "\tcell\tgenomic_DNA\tgenomic_DNA\tsynthetic_DNA";
603
604 #Dye
605 $tsm_line .= "\t".$data[$hpos{'DYE'}];
606
607 #BioMaterialCharacteristics[Organism]
608 $tsm_line .= "\t".$self->species();
609
610 #BioMaterialCharacteristics[BioSourceType]
611 $tsm_line .= "\tfrozen_sample";
612
613 #BioMaterialCharacteristics[StrainOrLine]
614 $tsm_line .= "\t$ctype_name";
615
616 #BioMaterialCharacteristics[CellType]
617 $tsm_line .= "\t$ctype_name";
618
619 #BioMaterialCharacteristics[Sex]
620 $tsm_line .= "\t???";
621 #FactorValue[StrainOrLine]
622 $tsm_line .= "\t$ctype_name";
623 #FactorValue[Immunoprecipitate]
624 $tsm_line .= ($type eq 'EXPERIMENTAL') ? "\tanti-${ftype_name} antibody\n" : "\t\n";
625
626 print $t2m_file $tsm_line;
627
628 }
629
630 }
631
632 close($t2m_file) if $self->{'write_mage'};
633 close($fh);
634
635 return;
636 }
637
638
639
640
641 =head2 read_probe_data
642
643 Example : $imp->read_probe_data();
644 Description: Parses and imports probes, probe sets and features of a given array
645 No duplicate handling or probe caching is performed due to memory
646 issues, this is done in resolve_probe_data.
647 Returntype : none
648 Exceptions : none
649 Caller : Importer
650 Status : Medium
651
652 =cut
653
654
655 #Assumes one chip_design per experimental set.
656 sub read_probe_data{
657 my ($self) = shift;
658
659 my ($fh, $line, @data, %hpos, %probe_pos);#, %duplicate_probes);
660 $self->log("Parsing and importing ".$self->vendor()." probe data (".localtime().")", 1);
661
662 ### Read in
663 #ndf file: probe_set, probe and probe_feature(.err contains multiple mappings)
664 #pos file: probe chromosome locations
665
666 #Need to change how probe_names are generated for nimblegen?
667 #native probe_ids may not be unique, but should be when combined with the seq_id which is currently being used as the xref_id
668 #Handle with API!!
669
670 #READ REGION POSITIONS
671 #We need to handle different coord systems and possibly different assmemblies
672 my $slice_a = $self->db->get_SliceAdaptor();
673 my $cs = $self->db->get_FGCoordSystemAdaptor()->fetch_by_name('chromosome');
674
675
676 #TIED FILE CACHE!!!
677 #We need to rebuild the cache from the DB before we start adding new probe info
678 #We only need to rebuild cache if we find a chip that hasn't been imported?
679 #No, we just need to import without cache, then re-do the resolve step
680 #Are we still going to get disconnects when we dump the cache?
681
682
683 #warn "Read probe data should only read in the array chips which are specified by the ExperimentalChip? Not just what is present in the DesignNotes file?";
684
685
686 foreach my $array(@{$self->arrays()}){
687
688 foreach my $achip(@{$array->get_ArrayChips()}){
689
690 my (@log, %probe_pos, $fasta_file, $f_out);
691 #do we need to fetch probe by seq and array?
692 #this would also id non-unique seqs in design
693
694 #warn "We need to account for different cs feature amppings here
695
696 if($achip->has_status('IMPORTED')){
697 $self->log("Skipping fully imported ArrayChip:\t".$achip->design_id());
698 next;
699 }elsif($self->recovery()){
700 $self->log("Rolling back ArrayChip:\t".$achip->design_id());
701 $self->rollback_ArrayChips([$achip]);
702 }
703
704 $self->log("Importing ArrayChip:".$achip->design_id());
705
706 #Always use pos file, ndf file cannot be guranteed to contain all location info
707 #pos file also gives a key to which probes should be considered 'EXPERIMENTAL'
708
709 #CACHE PROBE POSITIONS
710 $fh = open_file($self->get_dir("design")."/".$achip->name().".pos");
711
712 #don't % = map ! Takes a lot longer than a while ;)
713 while($line = <$fh>){
714 $line =~ s/\r*\n//o;#Not using last element
715 @data = split/\t/o, $line;
716
717 #SEQ_ID CHROMOSOME PROBE_ID POSITION COUNT
718 if ($. == 1){
719 %hpos = %{$self->set_header_hash(\@data, $self->get_config('pos_fields'))};
720 next;
721 }
722
723 #Skip probe if there is a duplicate
724 if(exists $probe_pos{$data[$hpos{'PROBE_ID'}]}){
725
726 if($data[$hpos{'CHROMOSOME'}] eq $probe_pos{$data[$hpos{'PROBE_ID'}]}->{chr} &&
727 ($data[$hpos{'POSITION'}]+1) eq $probe_pos{$data[$hpos{'PROBE_ID'}]}->{start}){
728 #log or warn here?
729
730 #Not matching probe length here
731
732 next;
733 #Do we need to skip this in the ndf file too?
734
735 }
736 else{
737 throw("Found duplicate mapping for ".$data[$hpos{'PROBE_ID'}].
738 " need implement duplicate logging/cleaning");
739 }
740 #need to build duplicate hash to clean elements from hash
741 # $duplicate_probes{$data[$hpos{'PROBE_ID'}]} = 1;
742 #next;
743 }
744
745
746 my $random = 0;
747
748 if(! $self->cache_slice($data[$hpos{'CHROMOSOME'}])){
749 push @log, "Skipping feature import for probe ".$data[$hpos{'PROBE_ID'}]." with non-standard region ".$data[$hpos{'CHROMOSOME'}];
750
751 #should we try and resolve the random chrs here?
752 #at least store probe/set/result and skip feature
753 #can we just import as chr with no start postition?
754
755 #if ($data[$hpos{'CHROMOSOME'}] =~ /_random/){
756
757 # if(! $self->cache_slice($data[$hpos{'CHROMOSOME'}])){
758 #push @log, "Skipping probe ".$data[$hpos{'PROBE_ID'}]." with non-standard region ".$data[$hpos{'CHROMOSOME'}];
759 #}else{
760 #we should really log this in a seprate file to avoid overloading the lgo file
761 # push @log, "Importing random probe ".$data[$hpos{'PROBE_ID'}]." on ".$data[$hpos{'CHROMOSOME'}]." omitting position";
762
763 #}
764
765 #}
766
767 }
768
769 #This is not handling probes with random chrs
770
771 $probe_pos{$data[$hpos{'PROBE_ID'}]} = {(
772 chr => $data[$hpos{'CHROMOSOME'}],
773 start => ($data[$hpos{'POSITION'}] +1),#default UCSC->Ensembl coord conversion
774 )};
775
776 }
777
778
779
780
781 #Remove duplicate probes
782
783 $self->log("Built position cache from : ".$achip->name().".pos", 1);
784 close($fh);
785
786 $self->log("Importing design probes from : ".$achip->name().".ndf");
787 #OPEN PROBE IN/OUT FILES
788 $fh = open_file($self->get_dir("design")."/".$achip->name().".ndf");
789 #Need to set these paths in each achip hash, file names could be tablename.chip_id.txt
790
791 #Need to add dbname/port/species/vendor to this path?
792 #.efg DropDatabase should also clean the fasta dumps and caches for a given DB
793
794 if($self->dump_fasta()){
795 $fasta_file = $self->get_dir('fastas').'/'.$achip->name().".fasta";
796 $self->backup_file($fasta_file);
797 $f_out = open_file($fasta_file, '>');
798 }
799
800
801 my ($length, $ops, $op, $of, %pfs);
802
803 #should define mapping_method arg to allows this to be set to LiftOver/EnsemblMap
804 my $anal = $self->db->get_AnalysisAdaptor()->fetch_by_logic_name("VendorMap");
805
806
807 my $strand = 0; #default for nimblegen, should be config hash?
808
809 #my $cig_line = "50M"; #default for nimblegen, should be config hash?
810 #probe length can change within design, should be built from length
811
812
813 my $fasta = "";
814
815 #$self->Timer()->mark("Starting probe loop");
816
817
818 #This is leaking about 30-60MB for each normal density chip?
819 #need Devel::Monitor here?
820
821
822 while($line = <$fh>){
823 $line =~ s/\r*\n//;
824 @data = split/\t/o, $line;
825 my $loc = "";
826 my $class = "EXPERIMENTAL";
827
828 #PROBE_DESIGN_ID CONTAINER DESIGN_NOTE SELECTION_CRITERIA SEQ_ID PROBE_SEQUENCE MISMATCH MATCH_INDEX FEATURE_ID ROW_NUM COL_NUM PROBE_CLASS PROBE_ID POSITION DESIGN_ID X Y
829 #2067_0025_0001 BLOCK1 0 chrX TTAGTTTAAAATAAACAAAAAGATACTCTCTGGTTATTAAATCAATTTCT 0 52822449 52822449 1 25 experimental chrXP10404896 10404896 2067 25 1
830
831 if ($. == 1){
832 %hpos = %{$self->set_header_hash(\@data, $self->get_config('ndf_fields'))};
833 next;
834 }
835
836
837 if (! exists $probe_pos{$data[$hpos{'PROBE_ID'}]}){
838 push @log, "Skipping non-experimental probe:\t".$data[$hpos{'PROBE_ID'}];
839 next;
840 }
841
842 #Which non-experimental probes might we want to store?
843 #if($data[$hpos{'CONTAINER'}] =~ /control/io){
844 # $class = "CONTROL";
845 #}
846 #elsif($data[$hpos{'CONTAINER'}] =~ /random/io){
847 # $class = "RANDOM";
848 #}
849 #elsif($data[$hpos{'PROBE_CLASS'}] !~ /experimental/io){
850 # $class = "OTHER";
851 #}
852 #elsif(! exists $probe_pos{$data[$hpos{'PROBE_ID'}]}){ #HACKY HACKY HACK HACK!! Needed for valid region retrival
853 # $class = "OTHER";
854 #}
855 #SPIKE INS?
856
857
858
859 #This assumes all probes in feature/probeset are next to each other!!!!!!!!!!
860
861
862 if($data[$hpos{'FEATURE_ID'}] != $data[$hpos{'MATCH_INDEX'}]){#Probe set data
863 #print "Generating new probeset:\tFeature id:\t".$data[$hpos{'FEATURE_ID'}]."\tmatchindex:\t".$data[$hpos{'MATCH_INDEX'}]."\n";
864
865 if($ops && ($data[$hpos{'FEATURE_ID'}] ne $ops->name())){
866 #THis is where we chose to update/validate
867 #Do we need to pass probes if they're already stored..may aswell to reduce mysql load?
868 #No point as we have to query anyway
869 $self->store_set_probes_features($achip->dbID(), \%pfs, $ops);
870 throw("ops still defined in caller") if defined $ops;
871 }
872
873 $ops = Bio::EnsEMBL::Funcgen::ProbeSet->new(
874 -NAME => $data[$hpos{'FEATURE_ID'}],
875 -SIZE => undef,
876 -FAMILY => $data[$hpos{'CONTAINER'}],
877 #xref_id => $data[$hpos{'SEQ_ID'}],#Need to populate xref table
878 );
879
880 #should we store straight away or build a probeset/probe/feature set, and then store and validate in turn?
881 #Store directly have separate method to validate and update?
882 #would need to check if one exists before storing anyway, else we could potentially duplicate the same probe/probeset from a different array
883 #remember for affy we need duplicate probe records with identical probe ids, probeset records unique across all arrays
884
885 undef %pfs
886 }
887 elsif($. > 2){#may have previous ops set, but next has no ops, or maybe just no ops's at all
888 $self->store_set_probes_features($achip->dbID(), \%pfs, $ops);
889 throw("ops still defined in caller") if defined $ops;
890 }
891
892
893 ###PROBES
894 #should we cat $xref_id to $probe_id here to generate unique id?
895 #would be messy to handle in the code, but would have to somewhere(in the retrieval code)
896
897 $length = length($data[$hpos{'PROBE_SEQUENCE'}]);
898 #$probe_string .= "\t${psid}\t".$data[$hpos{'PROBE_ID'}]."\t${length}\t$ac_id\t${class}\n";
899
900 $op = Bio::EnsEMBL::Funcgen::Probe->new(
901 -NAME => $data[$hpos{'PROBE_ID'}],
902 -LENGTH => $length,
903 -ARRAY => $array,
904 -ARRAY_CHIP_ID => $achip->dbID(),
905 -CLASS => $class,
906 );
907
908
909 %{$pfs{$data[$hpos{'PROBE_ID'}]}} = (
910 probe => $op,
911 features => [],
912 );
913
914
915 ###PROBE FEATURES
916 #How can we be certain that we have the same mapping in the DB?
917 #Put checks in here for build?
918 #Need to handle controls/randoms here
919 #won't have features but will have results!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
920 #The format of the pos file looks like it should have all the data required, but
921 # chromsome is missing, first undef :(
922 #Need to use $pos here instead of .pos file
923 #However have problems defining probe class, as not populated in test set
924 #derive from container! :(
925 #Ignore controls/random as they won't have a region
926 #Also need to handle multiple mappings?
927 #As results reference probes, no features, can have multiple features on different builds
928
929 #if($class eq "EXPERIMENTAL"){
930
931 #if(exists $regions{$data[$hpos{'SEQ_ID'}]}){
932 # $fid++;
933 # $pf_string .= "\t".$regions{$data[$hpos{'SEQ_ID'}]}{'seq_region_id'}."\t".$data[$hpos{'POSITION'}]."\t".
934 # ($data[$hpos{'POSITION'}] + $length)."\t${strand}\t".$regions{$data[$hpos{'SEQ_ID'}]}{'coord_system_id'}.
935 # "\t${pid}\t${anal_id}\t".$data[$hpos{'MISMATCH'}]."\t${cig_line}\n";
936 #$loc .= $regions{$data[$hpos{'SEQ_ID'}]}{'seq_region_id'}.":".$data[$hpos{'POSITION'}].
937 # "-".($data[$hpos{'POSITION'}] + $length).";" if ($self->{'_dump_fasta'});
938 # }
939 # else{
940 # die("No regions defined for ".$data[$hpos{'SEQ_ID'}]." ".$data[$hpos{'PROBE_ID'}].
941 #" with family ".$data[$hpos{'CONTAINER'}]);
942 # }
943
944
945
946
947 if ($self->dump_fasta()){
948 #(my $chr = $probe_pos{$data[$hpos{'PROBE_ID'}]}->{'chr'}) =~ s/chr//;
949
950 #$loc .= $chr.":".$probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'}."-".
951 # ($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'}+ $length).";";
952
953 #$fasta .= ">".$data[$hpos{'PROBE_ID'}]."\t".$data[$hpos{'CHROMOSOME'}].
954 # "\t$loc\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
955
956
957
958 #filter controls/randoms? Or would it be sensible to see where they map
959 #wrap seq here?
960 #$fasta .= ">".$data[$hpos{'PROBE_ID'}]."\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
961
962
963 #To use this for mapping, we really need the dbID nr fasta
964 #This can be generated after the import, or maybe during resolve?
965 #This is also currently done on a chip level, where as the cache is resolved at the array level
966 #We could simply cat the files before resolving the fasta file
967 #Need to do this otherwise we risk overwriting the fasta file with incomplete data.
968 #Can we validate sequence across probes with same name in this step?
969 #Just use probe name for now.
970
971 #We could cat and sort the fastas to make sure we have the same sequences
972 #Need to dump the design_id in the fasta header
973 #This would also reduce IO on the DB as identical probe will be consecutive, hence just one query to get the id.
974
975 #Changed th format an content of this to facilitate dbID nr fasta file generation and sequence validation
976
977 $fasta .= ">".$data[$hpos{'PROBE_ID'}]."\t".$achip->design_id."\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
978
979 #Print fasta every 10000 lines
980 if(! ($. % 10000)){
981 print $f_out $fasta;
982 $fasta = '';
983 }
984 }
985
986
987 #Hack!!!!!! Still importing probe (and result?)
988 next if(! $self->cache_slice($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'chr'}));
989 #warn("Skipping non standard probe (".$data[$hpos{'PROBE_ID'}].") with location:\t$loc\n");
990
991
992 $of = Bio::EnsEMBL::Funcgen::ProbeFeature->new
993 (
994 -START => $probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'},
995 -END =>($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'} + $length),
996 -STRAND => $strand,
997 -SLICE => $self->cache_slice($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'chr'}),
998 -ANALYSIS => $anal,
999 -MISMATCHCOUNT => $data[$hpos{'MISMATCH'}],#Is this always 0 for import? remove from header hash?
1000 -PROBE => undef, #Need to update this in the store method
1001 );
1002
1003
1004 push @{$pfs{$data[$hpos{'PROBE_ID'}]}{'features'}}, $of;
1005
1006 }
1007
1008 #need to store last data here
1009 $self->store_set_probes_features($achip->dbID(), \%pfs, $ops);
1010 $self->log(join("\n", @log));
1011 $achip->adaptor->store_status("IMPORTED", $achip);
1012 $self->log("ArrayChip:\t".$achip->design_id()." has been IMPORTED");
1013
1014 if ($self->dump_fasta()){
1015 print $f_out $fasta;
1016 close($f_out);
1017 }
1018
1019 $self->log("Imported design from:\t".$achip->name().".ndf", 1);
1020
1021
1022
1023 #$self->{'_probe_cache'} = undef;#As we can't get Y and Y info from the DB, this is only possible as the results files contain X and Y info
1024 }
1025
1026 #Should we build hash of probe_names:probe_feature_ids here for results import
1027 #Should we dump this as a lookup file for easier recoverability
1028 #This is the biggest step in the import
1029 #Building the hash would be fastest but least recoverable
1030 #Would have to write recover statments for each step i.e. build the most recent data structure required for the next import step
1031 #Individual queries for each result would take ages
1032 #This is all assuming there are no random records in the table i.e. ID series is linear with no gaps.
1033
1034
1035 #Could throw a random validation check in every X entries?
1036 #This would only work for non-parallel imports
1037 #periodic import when hit new probe_set, but no new data printed
1038
1039 }
1040
1041
1042 $self->log("Finished parsing probe data");
1043 #Total probe_sets:\t$psid\n".
1044 # "Total probes:\t$pid\nTotal probe_features:\t$fid");
1045
1046
1047 $self->resolve_probe_data();
1048
1049 return;
1050 }
1051
1052
1053
1054
1055 =head2 read_and_import_results_data
1056
1057 Example : $imp->read_results_data();
1058 Description: Parses and dumps raw results to file
1059 Returntype : none
1060 Exceptions : none
1061 Caller : Importer
1062 Status : at risk
1063
1064 =cut
1065
1066
1067 sub read_and_import_results_data{
1068 my $self = shift;
1069
1070 $self->log("Parsing ".$self->vendor()." results");
1071 my (@header, @data, @design_ids, @lines);
1072 my ($fh, $pid, $line, $file);
1073 my $anal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("RawValue");
1074 my $result_set = $self->get_import_ResultSet($anal, 'channel');
1075
1076
1077 if ($result_set) { #we have some new data to import
1078
1079 foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) {
1080
1081 #if( ! $echip->has_status('IMPORTED')){
1082
1083 foreach my $chan (@{$echip->get_Channels()}) {
1084
1085 if ( ! $chan->has_status('IMPORTED')) {
1086
1087 #we need to set write_mage here
1088
1089 my $array = $echip->get_ArrayChip->get_Array();
1090
1091 $self->get_probe_cache_by_Array($array) || throw('Failed to get the probe cache handle for results import, resolve cache here?');
1092 my ($probe_elem, $score_elem, %hpos);
1093 my $cnt = 0;
1094 my $r_string = "";
1095 my $chan_name = $echip->unique_id()."_".$self->get_config('dye_freqs')->{$chan->dye()};
1096 my $cc_id = $result_set->get_chip_channel_id($chan->dbID());
1097
1098
1099 #if ($self->recovery()) {
1100 # $self->log("Rolling back results for channel:\t${chan_name}");
1101 # $self->db->rollback_results($cc_id);
1102 # }
1103
1104 #open/backup output
1105 my $out_file = $self->get_dir("raw")."/result.".$chan_name.".txt";
1106 $self->backup_file($out_file);
1107 my $r_out = open_file($out_file, '>');
1108
1109 (my $alt_chan_name = $chan_name) =~ s/\_/\_1h\_/;
1110 my $found = 0;
1111
1112 FILE: foreach my $name($chan_name, $alt_chan_name){
1113
1114 foreach my $suffix ("_pair.txt", ".pair", ".txt") {
1115
1116 $file = $self->get_dir("results")."/".$name.$suffix;
1117
1118 if (-f $file) {
1119 $found = 1;
1120 last FILE;
1121 }
1122 }
1123 }
1124
1125 throw("Could not find result file for Channel(${chan_name}) in ".$self->get_dir('results')) if ! $found;
1126
1127 #open/slurp input
1128 $self->log("Reading result for channel $chan_name:\t$file", 1);
1129 $fh = open_file($file);
1130 @lines = <$fh>;
1131 close($fh);
1132
1133
1134 ###PROCESS HEADER
1135
1136 foreach my $i (0..$#lines) {
1137
1138 if ($lines[$i] =~ /PROBE_ID/o) {
1139 $lines[$i] =~ s/\r*\n//o;
1140 @data = split/\t/o, $lines[$i];
1141
1142 %hpos = %{$self->set_header_hash(\@data, $self->get_config('result_fields'))};
1143
1144 #remove header
1145 splice @lines, $i, 1;
1146
1147 last; #finished processing header
1148 }
1149 }
1150
1151 #we need to sort the result files based on the unique key(name at present, should replace with seq at some point)
1152 @lines = sort {(split/\t/o, $a)[$hpos{'PROBE_ID'}] cmp (split/\t/o, $b)[$hpos{'PROBE_ID'}]} @lines;
1153
1154 $self->log('Parsing results', 1);
1155
1156 foreach $line(@lines) {
1157
1158 #can we preprocess effectively?
1159 next if $line =~ /^#/;
1160 next if $line =~ /NGS_CONTROLS/;
1161 next if $line =~ /V_CODE/;
1162 next if $line =~ /H_CODE/;
1163 next if $line =~ /RANDOM/;
1164
1165 $line =~ s/\r*\n//o;
1166 @data = split/\t/o, $line;
1167
1168 ###PROCESS HEADER
1169 #if ($line =~ /PROBE_ID/o){
1170 #
1171 # %hpos = %{$self->set_header_hash(\@data, $self->get_config('result_fields'))};
1172 # next;#finished processing header
1173 #}
1174
1175 ###PROCESS DATA
1176 #Is this string concat causing the slow down, would it befaster to use an array and print a join?
1177
1178 if ($pid = $self->get_probe_id_by_name_Array($data[$hpos{'PROBE_ID'}], $array)) {
1179 $cnt ++;
1180 $r_string .= '\N'."\t${pid}\t".$data[$hpos{'PM'}]."\t${cc_id}\t".$data[$hpos{'X'}]."\t".$data[$hpos{'Y'}]."\n";
1181 } else {
1182 warn "Found unfiltered non-experimental probe in input $data[$hpos{'PROBE_ID'}]";
1183 }
1184
1185 ###PRINT SOME RESULTS
1186 if ($cnt > 10000) {
1187 $cnt = 0;
1188 print $r_out $r_string;
1189 $r_string ="";
1190 #could we fork here and import in the background?
1191 }
1192
1193 }
1194 #PRINT/CLOSE Channel file
1195 print $r_out $r_string;
1196 close($r_out);
1197 $self->log("Finished parsing $chan_name result", 1);
1198
1199 #Import directly here to avoid having to reparse all results if we crash!!!!
1200 $self->log("Importing:\t$out_file");
1201 $self->db->load_table_data("result", $out_file);
1202 $self->log("Finished importing:\t$out_file", 1);
1203 $chan->adaptor->store_status('IMPORTED', $chan);
1204
1205
1206 }
1207 }
1208 }
1209 }
1210 else {
1211 $self->log("Skipping results parse and import");
1212 }
1213
1214 $self->log("Finished parsing and importing results");
1215
1216 return;
1217 }
1218
1219
1220
1221 1;