0
|
1 #
|
|
2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::Parsers::Sanger
|
|
3 #
|
|
4
|
|
5 =head1 LICENSE
|
|
6
|
|
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and
|
|
8 Genome Research Limited. All rights reserved.
|
|
9
|
|
10 This software is distributed under a modified Apache license.
|
|
11 For license details, please see
|
|
12
|
|
13 http://www.ensembl.org/info/about/code_licence.html
|
|
14
|
|
15 =head1 CONTACT
|
|
16
|
|
17 Please email comments or questions to the public Ensembl
|
|
18 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
19
|
|
20 Questions may also be sent to the Ensembl help desk at
|
|
21 <helpdesk@ensembl.org>.
|
|
22
|
|
23 =head1 NAME
|
|
24
|
|
25 Bio::EnsEMBL::Funcgen::Parsers::Sanger
|
|
26
|
|
27 =head1 SYNOPSIS
|
|
28
|
|
29 my $parser_type = "Bio::EnsEMBL::Funcgen::Parsers::Sanger";
|
|
30 push @INC, $parser_type;
|
|
31 my $imp = $class->SUPER::new(@_);
|
|
32
|
|
33
|
|
34 =head1 DESCRIPTION
|
|
35
|
|
36 This is a definitions class which should not be instatiated directly, it
|
|
37 normally inherited from the Importer. Sanger contains meta data and methods
|
|
38 specific to Sanger PCR arrays to aid parsing and importing of experimental data.
|
|
39
|
|
40 =cut
|
|
41
|
|
42 package Bio::EnsEMBL::Funcgen::Parsers::Sanger;
|
|
43
|
|
44 use Bio::EnsEMBL::Funcgen::Array;
|
|
45 use Bio::EnsEMBL::Funcgen::ProbeSet;
|
|
46 use Bio::EnsEMBL::Funcgen::Probe;
|
|
47 use Bio::EnsEMBL::Funcgen::ProbeFeature;
|
|
48 use Bio::EnsEMBL::Funcgen::FeatureType;
|
|
49 use Bio::EnsEMBL::Funcgen::ExperimentalChip;
|
|
50 use Bio::EnsEMBL::Funcgen::ArrayChip;
|
|
51 use Bio::EnsEMBL::Funcgen::Channel;
|
|
52 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate );
|
|
53 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw(species_chr_num open_file);
|
|
54 use Bio::EnsEMBL::Funcgen::Utils::Helper;
|
|
55 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
56 use strict;
|
|
57
|
|
58 use vars qw(@ISA);
|
|
59 @ISA = qw(Bio::EnsEMBL::Funcgen::Utils::Helper);
|
|
60
|
|
61 =head2 new
|
|
62
|
|
63 Example : my $self = $class->SUPER::new(@_);
|
|
64 Description: Constructor method for Sanger class
|
|
65 Returntype : Bio::EnsEMBL::Funcgen::Parsers::Sanger
|
|
66 Exceptions : throws if Experiment name not defined or if caller is not Importer
|
|
67 Caller : Bio::EnsEMBL::Funcgen::Importer
|
|
68 Status : at risk
|
|
69
|
|
70 =cut
|
|
71
|
|
72
|
|
73 sub new{
|
|
74 my $caller = shift;
|
|
75
|
|
76 my $class = ref($caller) || $caller;
|
|
77 my $self = $class->SUPER::new();
|
|
78
|
|
79 throw("This is a skeleton class for Bio::EnsEMBL::Importer, should not be used directly") if(! $self->isa("Bio::EnsEMBL::Funcgen::Importer"));
|
|
80
|
|
81 $self->{'config'} = {(
|
|
82 #order of these data arrays is important!
|
|
83 array_data => [], #["array_chip"],
|
|
84 probe_data => ["array_probe"],
|
|
85 results_data => ["and_import_result"],
|
|
86 #import_methods => [],
|
|
87 #data paths here?
|
|
88 norm_method => undef,
|
|
89 #is this disabling -input_dir override option?
|
|
90 )};
|
|
91
|
|
92
|
|
93
|
|
94 return $self;
|
|
95 }
|
|
96
|
|
97 =head2 set_config
|
|
98
|
|
99 Example : $imp->set_config();
|
|
100 Description: Sets a attribute dependent variables
|
|
101 Returntype : none
|
|
102 Exceptions : None
|
|
103 Caller : Importer
|
|
104 Status : At risk
|
|
105
|
|
106 =cut
|
|
107
|
|
108 sub set_config{
|
|
109 my ($self) = @_;
|
|
110
|
|
111 #placeholder method for setting any attr dependant vars e.g. file paths etc.
|
|
112
|
|
113
|
|
114 return;
|
|
115 }
|
|
116
|
|
117
|
|
118 sub read_array_probe_data{
|
|
119 my ($self, $array_file) = @_;
|
|
120
|
|
121 warn("Remove hard coding for Sanger array import, and accomodate adf format");
|
|
122
|
|
123
|
|
124 $array_file ||= $self->array_file();
|
|
125 my ($line, $fh, @list, $array_file_format, $cmd);
|
|
126 my ($op, $of, $imported, $fimported, $fanal);
|
|
127 my $oa_adaptor = $self->db->get_ArrayAdaptor();
|
|
128 my $op_adaptor = $self->db->get_ProbeAdaptor();
|
|
129 my $of_adaptor = $self->db->get_ProbeFeatureAdaptor();
|
|
130 my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor();
|
|
131 my $ac_adaptor = $self->db->get_ArrayChipAdaptor();
|
|
132 my $slice_adaptor = $self->db->get_SliceAdaptor();
|
|
133 my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("SangerPCR")->dbID();
|
|
134 #have LiftOver? Could then use liftover in pipeline to redo mappings
|
|
135
|
|
136 #store now checks whether already stored and updates array chips accordingly
|
|
137 my $array = Bio::EnsEMBL::Funcgen::Array->new
|
|
138 (
|
|
139 -NAME => $self->array_name(),
|
|
140 -FORMAT => uc($self->format()),
|
|
141 -VENDOR => uc($self->vendor()),
|
|
142 -TYPE => 'PCR',
|
|
143 -DESCRIPTION => "Sanger ENCODE PCR array 3.1.1",
|
|
144 );
|
|
145
|
|
146 ($array) = @{$oa_adaptor->store($array)};
|
|
147
|
|
148 #This is treating each array chip as a separate array, unless arrayset is defined
|
|
149 #AT present we have no way of differentiating between different array_chips on same array???!!!
|
|
150 #Need to add functionality afterwards to collate array_chips into single array
|
|
151 my $array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
|
|
152 -NAME => $array->name(),
|
|
153 -DESIGN_ID => $array->name(),
|
|
154 -ARRAY_ID =>$array->dbID(),
|
|
155 );
|
|
156
|
|
157 ($array_chip) = @{$ac_adaptor->store($array_chip)};
|
|
158 $array->add_ArrayChip($array_chip);
|
|
159 $self->add_Array($array);
|
|
160
|
|
161
|
|
162 #we also need to test wether the array as been imported as well as the mappings
|
|
163 #THis needs to use coord_sys-id not schema_build!! Duplcaite entries for different schema_builds
|
|
164 #with same assembly
|
|
165
|
|
166 my $dnadb_cs = $self->db->dnadb->get_CoordSystemAdaptor->fetch_by_name('chromosome');
|
|
167 my $fg_cs = $self->db->get_FGCoordSystemAdaptor->validate_and_store_coord_system($dnadb_cs);
|
|
168
|
|
169
|
|
170 #This fails if we're pointing to an old DB during the release cycle. Will be fine if we manage to cs mapping dynamically
|
|
171
|
|
172
|
|
173 if ($array_chip->has_status('IMPORTED')) {
|
|
174 $imported = 1;
|
|
175 $self->log("Skipping ArrayChip probe import (".$array_chip->name().") already fully imported");
|
|
176
|
|
177 #need to build cache here, from file first else from DB????
|
|
178 #This is required for feature only imports
|
|
179 #as we won't have the probe dbID available
|
|
180
|
|
181 if(! $self->get_probe_cache_by_Array($array)){
|
|
182 $self->get_probe_cache_by_Array($array, 1);
|
|
183 }
|
|
184
|
|
185
|
|
186
|
|
187 } elsif ($self->recovery()) {
|
|
188 $self->log("Rolling back partially imported ArrayChip:\t".$array_chip->name());
|
|
189 $self->db->rollback_ArrayChip([$array_chip]); #This should really remove all CS imports too?
|
|
190 }
|
|
191
|
|
192
|
|
193 #should never really have CS imports if not IMPORTED
|
|
194 #there is however the potential to trash a lot of data if we were to remove the CS importes by mistake
|
|
195 #do we need to check whether any other sets are using the data?
|
|
196 #we have to check for result using relevant cs_id and cc_id
|
|
197 #no removal of probes is the key thing here as nothing is dependent on the feature_ids
|
|
198 #get all result sets by array chip? or get all ExperimentalChips by array chip
|
|
199 #would have to be result set as we would find our own ecs. May find our own rset
|
|
200
|
|
201
|
|
202 throw('This needs updating');
|
|
203
|
|
204 if ($array_chip->has_status('IMPORTED_CS_'.$fg_cs->dbID())) {
|
|
205 $fimported = 1;
|
|
206 $self->log("Skipping ArrayChip feature import (".$array_chip->name().") already fully imported for ".$self->data_version());
|
|
207 } elsif ($self->recovery()) {
|
|
208 $self->log("Rolling back partially imported ArrayChip features:\t".$array_chip->name());
|
|
209 $self->db->rollback_ArrayChip_features($array_chip, $fg_cs);
|
|
210 }
|
|
211
|
|
212
|
|
213 #need to check whether already imported on specified schema_build
|
|
214 #check for appropriate file given format in input dir or take path
|
|
215
|
|
216 #if (! $fimported) {#now need to do this irrespective of import status due to x y requirements
|
|
217 #need only do this once, i.e. if the cache isn't defined yet
|
|
218 #this is assuming cache will be built properly
|
|
219 #may cause problems if not cleaned up properly after use.
|
|
220
|
|
221 #ignore xy requirements for now, these should be associated with results file
|
|
222
|
|
223
|
|
224
|
|
225 #if (! defined $self->{'_probe_cache'}) {
|
|
226 if (! $fimported) {
|
|
227
|
|
228
|
|
229
|
|
230 if (! $array_file) {
|
|
231
|
|
232 if (! defined $self->get_dir('input')) {
|
|
233 throw("No input_dir defined, if you are running in a non Experiment context please use -array_file");
|
|
234 }
|
|
235
|
|
236 #hacky ..do better?
|
|
237 for my $suffix ("gff", "adf") {
|
|
238 $cmd = $self->get_dir('input')."/".$self->array_name()."*".$suffix;
|
|
239 @list = `ls $cmd 2>/dev/null`;
|
|
240
|
|
241 if ((scalar(@list) == 1) &&
|
|
242 ($list[0] !~ /No such file or directory/o)) { ###this is only printed to STDERR?
|
|
243
|
|
244 if (! defined $array_file) {
|
|
245 $array_file = $list[0];
|
|
246 } else {
|
|
247 throw("Found more than one array file : $array_file\t$list[0]\nSpecify one with -array_file");
|
|
248 }
|
|
249 }
|
|
250 }
|
|
251
|
|
252 throw("Cannot find array file. Specify one with -array_file") if (! defined $array_file);
|
|
253 }
|
|
254
|
|
255
|
|
256 if ($array_file =~ /gff/io) {
|
|
257 $array_file_format = "GFF";
|
|
258 } elsif ($array_file =~ /adf/io) {
|
|
259 $array_file_format = "ADF";
|
|
260 throw("Does not yet accomodate Sanger adf format");
|
|
261 } else {
|
|
262 throw("Could not determine array file format: $array_file");
|
|
263 }
|
|
264
|
|
265
|
|
266 #if (! $fimported) {
|
|
267 $fanal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name(($array_file_format eq "ADF") ? "VendorMap" : "LiftOver");
|
|
268 #}
|
|
269
|
|
270 $self->log("Parsing ".$self->vendor()." array data (".localtime().")");
|
|
271 $fh = open_file($array_file);
|
|
272 my @lines = <$fh>;
|
|
273 close($fh);
|
|
274
|
|
275
|
|
276
|
|
277 my ($chr, $start, $end, $strand, $pid);#, $x, $y, $meta_x, $meta_y, @xy);
|
|
278
|
|
279 #avoid mutliple calls for same array
|
|
280 my $ac_dbid = $array->get_ArrayChip_by_design_id($array->name())->dbID();
|
|
281
|
|
282 #sort file to enable probe cache method for new feature imports
|
|
283 @lines = sort {(split/\t|\;/o, $a)[8] cmp (split/\t|\;/o, $b)[8]} @lines;
|
|
284
|
|
285 #This is not sorting properly!!
|
|
286
|
|
287 #my @tmp = map ((split/\t|\;/o, $_)[8], @lines);
|
|
288 #@tmp = sort @tmp;
|
|
289
|
|
290
|
|
291 #$self->log('Tmp sorted array is :\n'.join("\n", @tmp)."\n");
|
|
292
|
|
293
|
|
294
|
|
295
|
|
296 foreach $line(@lines) {
|
|
297 $line =~ s/\r*\n//;
|
|
298
|
|
299 #($chr, $start, $end, $ratio, $pid) = split/\t/o, $line;
|
|
300 #($chr, undef, undef, $start, $end, undef, $strand, undef, $pid, $x, $y, $meta_x, $meta_y) = split/\t|\;/o, $line;
|
|
301 ($chr, undef, undef, $start, $end, undef, $strand, undef, $pid) = split/\t|\;/o, $line;
|
|
302
|
|
303
|
|
304 if($self->ucsc_coords){
|
|
305 $start += 1;
|
|
306 }
|
|
307
|
|
308
|
|
309 #$meta_x =~ s/META_X=//;
|
|
310 #$x =~ s/X=//;
|
|
311 #$x = $x + (($meta_x -1)*26);
|
|
312 #$meta_y =~ s/META_Y=//;
|
|
313 #$y =~ s/Y=//;
|
|
314 #$y = $y + (($meta_y -1)*25);
|
|
315 $pid =~ s/reporter_id=//o;
|
|
316 $chr =~ s/chr//;
|
|
317 $strand = ($strand eq "+") ? 0 : 1;
|
|
318
|
|
319 #Hack!!!!!! This is still maintaining the probe entry (and result?)
|
|
320 if (! $self->cache_slice($chr)) {
|
|
321 warn("-- Skipping non standard probe (${pid}) with location:\t${chr}:${start}-${end}\n");
|
|
322 next;
|
|
323 }
|
|
324
|
|
325
|
|
326 #need to parse dependant on file format
|
|
327 #also need to account for duplicate probes on grid
|
|
328
|
|
329 #need to test for imprted here for rebuilding the probe_info cache
|
|
330 #this will result in always using first x y for the inital import (i.e. skip any probe already in cache)
|
|
331 #or using last x y for previosuly imported as we can't check the cache as it will already be there
|
|
332 #could check for x y
|
|
333 #should always check x y as this will also implicitly check if it is in the cache
|
|
334
|
|
335 #if (! $self->get_probe_id_by_name($pid)) { #already present in cache
|
|
336 #if(! (@xy = @{$self->get_probe_x_y_by_name($pid)})){
|
|
337
|
|
338 #can we not use store_set_probes_features
|
|
339 #would have to add x y to probe, which is not logical as probe can have many x y's
|
|
340 #keep like this and just change cache_probe_info
|
|
341
|
|
342 if (! $imported) {
|
|
343 #when we utilise array coords, we need to look up probe cache and store again with new coords
|
|
344 #we're currently storing duplicates i.e. different ids with for same probe
|
|
345 #when we should be storing two records for the same probe/id
|
|
346 #the criteria for this will be different for each vendor, may have to check container etc for NimbleGen
|
|
347
|
|
348 $op = Bio::EnsEMBL::Funcgen::Probe->new(
|
|
349 -NAME => $pid,
|
|
350 -LENGTH => ($end - $start),
|
|
351 -ARRAY => $array,
|
|
352 -ARRAY_CHIP_ID => $ac_dbid,
|
|
353 -CLASS => 'EXPERIMENTAL',
|
|
354 );
|
|
355
|
|
356 ($op) = @{$op_adaptor->store($op)};
|
|
357 #$self->cache_probe_info($pid, $op->dbID, $x, $y);
|
|
358 } else {
|
|
359 #update XY cache for previously imported array
|
|
360 #$self->cache_probe_info($pid, $self->get_probe_id_by_name($pid), $x, $y);
|
|
361 }
|
|
362
|
|
363 #if (! $fimported) {
|
|
364 $of = Bio::EnsEMBL::Funcgen::ProbeFeature->new(
|
|
365 -START => $start,
|
|
366 -END => $end,
|
|
367 -STRAND => $strand,
|
|
368 -SLICE => $self->cache_slice($chr),
|
|
369 -ANALYSIS => $fanal,
|
|
370 -MISMATCHCOUNT => 0,
|
|
371 -PROBE_ID => ($imported) ?
|
|
372 $self->get_probe_id_by_name_Array($pid, $array) : $op->dbID(),
|
|
373 );
|
|
374
|
|
375 #get_probe_id will throw if not in cache, which means that we have an unimported probe
|
|
376 #for an ArrayChip which is flagged as imported, must have been omitted from the import deisgn
|
|
377 #probably a manual fix required. Can we log these and write an update/repair script.
|
|
378
|
|
379 $of_adaptor->store($of);
|
|
380 #}
|
|
381
|
|
382 #} else {
|
|
383 #warn("Sanger does not accomodate on plate duplicates yet, result are not linked to X Y coords, using first coords for probe if present in results for $pid\n");¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡¡
|
|
384 #}
|
|
385 }
|
|
386
|
|
387 $array_chip->adaptor->set_status('IMPORTED_CS_'.$fg_cs->dbID(), $array_chip) if ! $fimported;
|
|
388 $self->log("ArrayChip:\t".$array_chip->design_id()." has been IMPORTED_CS_".$fg_cs->dbID());
|
|
389
|
|
390 }
|
|
391
|
|
392
|
|
393
|
|
394 if (! $imported) {
|
|
395 $array_chip->adaptor->set_status('IMPORTED', $array_chip);
|
|
396 $self->log("ArrayChip:\t".$array_chip->design_id()." has been IMPORTED");
|
|
397 $self->resolve_probe_data();
|
|
398 }
|
|
399
|
|
400 $self->log("Finished parsing ".$self->vendor()." array/probe data (".localtime().")");
|
|
401 #warn("Finished parsing ".$self->vendor()." array/probe data (".localtime().")");
|
|
402
|
|
403 return;
|
|
404 }
|
|
405
|
|
406 =head2 read_and_import_result_data
|
|
407
|
|
408 Example : $imp->read_and_import_result_data();
|
|
409 Description: Parses and imports result for the sanger PCR array platform
|
|
410 Returntype : none
|
|
411 Exceptions : none
|
|
412 Caller : Importer
|
|
413 Status : At risk
|
|
414
|
|
415 =cut
|
|
416
|
|
417 sub read_and_import_result_data{
|
|
418 my $self = shift;
|
|
419
|
|
420 #change this to read_gff_chip_results
|
|
421 #as opposed to gff channel results
|
|
422 #This should also use the default logic names for the Vendor, or take a user defined list
|
|
423 $self->log("Reading ".$self->vendor()." result data (".localtime().")");
|
|
424
|
|
425 my ($file, $chip_uid, $line, $echip);
|
|
426 my ($ratio, $pid, %chip_files, %roll_back);
|
|
427 my $of_adaptor = $self->db->get_ProbeFeatureAdaptor();
|
|
428 my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor();
|
|
429 my $chan_adaptor = $self->db->get_ChannelAdaptor();
|
|
430 my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("SangerPCR");
|
|
431 my $result_adaptor = $self->db->get_ResultSetAdaptor();
|
|
432 #this is done to avoid having to self->array_name in loop, will make multiple array loop easier
|
|
433 my $array = ${$self->arrays()}[0];
|
|
434
|
|
435 #This works a little differently as we're not parsing a meta file
|
|
436 #so the echips haven't been added yet.
|
|
437 #This is treating each array chip as a separate array, unless arrayset is defined
|
|
438 #AT present we have no way of differentiating between different array_chips on same array???!!!
|
|
439 #Need to add functionality afterwards to collate array_chips into single array
|
|
440
|
|
441 #First add the echips to the Experiment
|
|
442
|
|
443 if (! @{$self->result_files()}) {
|
|
444 my $list = "ls ".$self->input_dir().'/[0-9]*-[0-9a-zA-Z]*\.all\.*';
|
|
445 my @rfiles = `$list`;
|
|
446 $self->result_files(\@rfiles);
|
|
447 }
|
|
448
|
|
449
|
|
450 foreach $file(@{$self->result_files()}) {
|
|
451 chomp $file;
|
|
452 ($chip_uid = $file) =~ s/.*\///;
|
|
453 $chip_uid =~ s/\..*//;
|
|
454
|
|
455 $self->log("Found SANGER results file for $chip_uid:\t$file");
|
|
456 $chip_files{$chip_uid} = $file;
|
|
457
|
|
458
|
|
459 $echip = $ec_adaptor->fetch_by_unique_id_vendor($chip_uid, 'SANGER');
|
|
460
|
|
461 #this should throw if not recovery
|
|
462 #Nee to check Nimbelgen methods
|
|
463
|
|
464 if ($echip) {
|
|
465
|
|
466 if (! $self->recovery()) {
|
|
467 throw("ExperimentalChip(".$echip->unqiue_id().") already exists in the database\nMaybe you want to recover?");
|
|
468 }else{
|
|
469 #log pre-reg'd chips for rollback
|
|
470 $roll_back{$echip->dbID()} = 1;
|
|
471 }
|
|
472 } else {
|
|
473
|
|
474 $echip = Bio::EnsEMBL::Funcgen::ExperimentalChip->new
|
|
475 (
|
|
476 -EXPERIMENT_ID => $self->experiment->dbID(),
|
|
477 -ARRAY_CHIP_ID => $self->arrays->[0]->get_ArrayChip_by_design_id($array->name())->dbID(),
|
|
478 -UNIQUE_ID => $chip_uid,
|
|
479 );
|
|
480
|
|
481 ($echip) = @{$ec_adaptor->store($echip)};
|
|
482 $self->experiment->add_ExperimentalChip($echip); #if we need a contains method in here , always add!!
|
|
483 }
|
|
484
|
|
485 #do we need DUMMY entries any more?
|
|
486
|
|
487 #sub this passing the echip?
|
|
488 foreach my $type ('DUMMY_TOTAL', 'DUMMY_EXPERIMENTAL') {
|
|
489
|
|
490 my $channel = $chan_adaptor->fetch_by_type_experimental_chip_id($type, $echip->dbID());
|
|
491
|
|
492 if ($channel) {
|
|
493 if (! $self->recovery()) {
|
|
494 throw("Channel(".$echip->unique_id().":$type) already exists in the database\nMaybe you want to recover?");
|
|
495 }
|
|
496 } else {
|
|
497
|
|
498 $channel = Bio::EnsEMBL::Funcgen::Channel->new
|
|
499 (
|
|
500 -EXPERIMENTAL_CHIP_ID => $echip->dbID(),
|
|
501 -TYPE => $type,
|
|
502 );
|
|
503
|
|
504 ($channel) = @{$chan_adaptor->store($channel)};
|
|
505 }
|
|
506 }
|
|
507 }
|
|
508
|
|
509
|
|
510
|
|
511 #Now get rset using experiment echips
|
|
512 my $rset = $self->get_import_ResultSet($analysis, 'experimental_chip');
|
|
513
|
|
514 if ($rset) { #we have some new data
|
|
515
|
|
516 foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) {
|
|
517
|
|
518 if ($echip->has_status('IMPORTED_SangerPCR', $echip)) {
|
|
519 $self->log("ExperimentalChip(".$echip->unique_id().") has already been imported");
|
|
520 } else {
|
|
521
|
|
522 my $cc_id = $rset->get_chip_channel_id($echip->dbID());
|
|
523
|
|
524 if ($self->recovery() && $roll_back{$echip->dbID()}){
|
|
525 $self->log("Rolling back results for ExperimentalChip:\t".$echip->unique_id());
|
|
526 $self->rollback_results($cc_id);
|
|
527 }
|
|
528
|
|
529 $self->log("Reading SANGER result file for ".$echip->unique_id().":\t".$chip_files{$echip->unique_id()});
|
|
530 $self->get_probe_cache_by_Array($array) || throw('Failed to reset probe cache handle');
|
|
531 my $fh = open_file($chip_files{$echip->unique_id()});
|
|
532 my @lines = <$fh>;
|
|
533 close($fh);
|
|
534
|
|
535 my $rfile_path = $self->get_dir("norm")."/result.SangerPCR.".$echip->unique_id().".txt";
|
|
536 my $rfile = open_file($rfile_path, '>');
|
|
537 my $r_string = "";
|
|
538
|
|
539
|
|
540 @lines = sort {(split/\t|\:/o, $a)[5] cmp (split/\t|\:/o, $b)[5]} @lines;
|
|
541
|
|
542 foreach my $line (@lines) {
|
|
543 $line =~ s/\r*\n//o;
|
|
544
|
|
545 ($ratio, undef, $pid) = (split/\t|\:/o, $line)[3..5];
|
|
546 $pid =~ s/.*://o;
|
|
547
|
|
548 $ratio = '\N' if $ratio eq 'NA'; #NULL is still useful info to store in result
|
|
549 #my ($x, $y) = @{$self->get_probe_x_y_by_name($pid)};
|
|
550
|
|
551 #this is throwing away the encode region which could be used for the probeset/family?
|
|
552 $r_string .= '\N'."\t".$self->get_probe_id_by_name_Array($pid, $array)."\t${ratio}\t${cc_id}\t".'\N'."\t".'\N'."\n";#${x}\t${y}\n";
|
|
553 }
|
|
554
|
|
555 print $rfile $r_string;
|
|
556 close($rfile);
|
|
557
|
|
558 $self->log("Importing:\t$rfile_path");
|
|
559 $self->db->load_table_data("result", $rfile_path);
|
|
560 $self->log("Finished importing:\t$rfile_path");
|
|
561 $echip->adaptor->set_status('IMPORTED_SangerPCR', $echip);
|
|
562 }
|
|
563 }
|
|
564
|
|
565
|
|
566
|
|
567 } else {
|
|
568 $self->log("No new data, skipping result parse");
|
|
569 }
|
|
570
|
|
571 $self->log("Finished reading and importing ".$self->vendor()." result data (".localtime().")");
|
|
572 return;
|
|
573 }
|
|
574
|
|
575
|
|
576
|
|
577 1;
|