Mercurial > repos > mahtabm > ensembl
comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/Sanger.pm @ 0:1f6dce3d34e0
Uploaded
| author | mahtabm |
|---|---|
| date | Thu, 11 Apr 2013 02:01:53 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1f6dce3d34e0 |
|---|---|
| 1 # | |
| 2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::Parsers::Sanger | |
| 3 # | |
| 4 | |
| 5 =head1 LICENSE | |
| 6 | |
| 7 Copyright (c) 1999-2011 The European Bioinformatics Institute and | |
| 8 Genome Research Limited. All rights reserved. | |
| 9 | |
| 10 This software is distributed under a modified Apache license. | |
| 11 For license details, please see | |
| 12 | |
| 13 http://www.ensembl.org/info/about/code_licence.html | |
| 14 | |
| 15 =head1 CONTACT | |
| 16 | |
| 17 Please email comments or questions to the public Ensembl | |
| 18 developers list at <ensembl-dev@ebi.ac.uk>. | |
| 19 | |
| 20 Questions may also be sent to the Ensembl help desk at | |
| 21 <helpdesk@ensembl.org>. | |
| 22 | |
| 23 =head1 NAME | |
| 24 | |
| 25 Bio::EnsEMBL::Funcgen::Parsers::Sanger | |
| 26 | |
| 27 =head1 SYNOPSIS | |
| 28 | |
| 29 my $parser_type = "Bio::EnsEMBL::Funcgen::Parsers::Sanger"; | |
| 30 push @INC, $parser_type; | |
| 31 my $imp = $class->SUPER::new(@_); | |
| 32 | |
| 33 | |
| 34 =head1 DESCRIPTION | |
| 35 | |
| 36 This is a definitions class which should not be instatiated directly, it | |
| 37 normally inherited from the Importer. Sanger contains meta data and methods | |
| 38 specific to Sanger PCR arrays to aid parsing and importing of experimental data. | |
| 39 | |
| 40 =cut | |
| 41 | |
| 42 package Bio::EnsEMBL::Funcgen::Parsers::Sanger; | |
| 43 | |
| 44 use Bio::EnsEMBL::Funcgen::Array; | |
| 45 use Bio::EnsEMBL::Funcgen::ProbeSet; | |
| 46 use Bio::EnsEMBL::Funcgen::Probe; | |
| 47 use Bio::EnsEMBL::Funcgen::ProbeFeature; | |
| 48 use Bio::EnsEMBL::Funcgen::FeatureType; | |
| 49 use Bio::EnsEMBL::Funcgen::ExperimentalChip; | |
| 50 use Bio::EnsEMBL::Funcgen::ArrayChip; | |
| 51 use Bio::EnsEMBL::Funcgen::Channel; | |
| 52 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate ); | |
| 53 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw(species_chr_num open_file); | |
| 54 use Bio::EnsEMBL::Funcgen::Utils::Helper; | |
| 55 use Bio::EnsEMBL::Utils::Argument qw( rearrange ); | |
| 56 use strict; | |
| 57 | |
| 58 use vars qw(@ISA); | |
| 59 @ISA = qw(Bio::EnsEMBL::Funcgen::Utils::Helper); | |
| 60 | |
| 61 =head2 new | |
| 62 | |
| 63 Example : my $self = $class->SUPER::new(@_); | |
| 64 Description: Constructor method for Sanger class | |
| 65 Returntype : Bio::EnsEMBL::Funcgen::Parsers::Sanger | |
| 66 Exceptions : throws if Experiment name not defined or if caller is not Importer | |
| 67 Caller : Bio::EnsEMBL::Funcgen::Importer | |
| 68 Status : at risk | |
| 69 | |
| 70 =cut | |
| 71 | |
| 72 | |
| 73 sub new{ | |
| 74 my $caller = shift; | |
| 75 | |
| 76 my $class = ref($caller) || $caller; | |
| 77 my $self = $class->SUPER::new(); | |
| 78 | |
| 79 throw("This is a skeleton class for Bio::EnsEMBL::Importer, should not be used directly") if(! $self->isa("Bio::EnsEMBL::Funcgen::Importer")); | |
| 80 | |
| 81 $self->{'config'} = {( | |
| 82 #order of these data arrays is important! | |
| 83 array_data => [], #["array_chip"], | |
| 84 probe_data => ["array_probe"], | |
| 85 results_data => ["and_import_result"], | |
| 86 #import_methods => [], | |
| 87 #data paths here? | |
| 88 norm_method => undef, | |
| 89 #is this disabling -input_dir override option? | |
| 90 )}; | |
| 91 | |
| 92 | |
| 93 | |
| 94 return $self; | |
| 95 } | |
| 96 | |
| 97 =head2 set_config | |
| 98 | |
| 99 Example : $imp->set_config(); | |
| 100 Description: Sets a attribute dependent variables | |
| 101 Returntype : none | |
| 102 Exceptions : None | |
| 103 Caller : Importer | |
| 104 Status : At risk | |
| 105 | |
| 106 =cut | |
| 107 | |
| 108 sub set_config{ | |
| 109 my ($self) = @_; | |
| 110 | |
| 111 #placeholder method for setting any attr dependant vars e.g. file paths etc. | |
| 112 | |
| 113 | |
| 114 return; | |
| 115 } | |
| 116 | |
| 117 | |
| 118 sub read_array_probe_data{ | |
| 119 my ($self, $array_file) = @_; | |
| 120 | |
| 121 warn("Remove hard coding for Sanger array import, and accomodate adf format"); | |
| 122 | |
| 123 | |
| 124 $array_file ||= $self->array_file(); | |
| 125 my ($line, $fh, @list, $array_file_format, $cmd); | |
| 126 my ($op, $of, $imported, $fimported, $fanal); | |
| 127 my $oa_adaptor = $self->db->get_ArrayAdaptor(); | |
| 128 my $op_adaptor = $self->db->get_ProbeAdaptor(); | |
| 129 my $of_adaptor = $self->db->get_ProbeFeatureAdaptor(); | |
| 130 my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor(); | |
| 131 my $ac_adaptor = $self->db->get_ArrayChipAdaptor(); | |
| 132 my $slice_adaptor = $self->db->get_SliceAdaptor(); | |
| 133 my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("SangerPCR")->dbID(); | |
| 134 #have LiftOver? Could then use liftover in pipeline to redo mappings | |
| 135 | |
| 136 #store now checks whether already stored and updates array chips accordingly | |
| 137 my $array = Bio::EnsEMBL::Funcgen::Array->new | |
| 138 ( | |
| 139 -NAME => $self->array_name(), | |
| 140 -FORMAT => uc($self->format()), | |
| 141 -VENDOR => uc($self->vendor()), | |
| 142 -TYPE => 'PCR', | |
| 143 -DESCRIPTION => "Sanger ENCODE PCR array 3.1.1", | |
| 144 ); | |
| 145 | |
| 146 ($array) = @{$oa_adaptor->store($array)}; | |
| 147 | |
| 148 #This is treating each array chip as a separate array, unless arrayset is defined | |
| 149 #AT present we have no way of differentiating between different array_chips on same array???!!! | |
| 150 #Need to add functionality afterwards to collate array_chips into single array | |
| 151 my $array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new( | |
| 152 -NAME => $array->name(), | |
| 153 -DESIGN_ID => $array->name(), | |
| 154 -ARRAY_ID =>$array->dbID(), | |
| 155 ); | |
| 156 | |
| 157 ($array_chip) = @{$ac_adaptor->store($array_chip)}; | |
| 158 $array->add_ArrayChip($array_chip); | |
| 159 $self->add_Array($array); | |
| 160 | |
| 161 | |
| 162 #we also need to test wether the array as been imported as well as the mappings | |
| 163 #THis needs to use coord_sys-id not schema_build!! Duplcaite entries for different schema_builds | |
| 164 #with same assembly | |
| 165 | |
| 166 my $dnadb_cs = $self->db->dnadb->get_CoordSystemAdaptor->fetch_by_name('chromosome'); | |
| 167 my $fg_cs = $self->db->get_FGCoordSystemAdaptor->validate_and_store_coord_system($dnadb_cs); | |
| 168 | |
| 169 | |
| 170 #This fails if we're pointing to an old DB during the release cycle. Will be fine if we manage to cs mapping dynamically | |
| 171 | |
| 172 | |
| 173 if ($array_chip->has_status('IMPORTED')) { | |
| 174 $imported = 1; | |
| 175 $self->log("Skipping ArrayChip probe import (".$array_chip->name().") already fully imported"); | |
| 176 | |
| 177 #need to build cache here, from file first else from DB???? | |
| 178 #This is required for feature only imports | |
| 179 #as we won't have the probe dbID available | |
| 180 | |
| 181 if(! $self->get_probe_cache_by_Array($array)){ | |
| 182 $self->get_probe_cache_by_Array($array, 1); | |
| 183 } | |
| 184 | |
| 185 | |
| 186 | |
| 187 } elsif ($self->recovery()) { | |
| 188 $self->log("Rolling back partially imported ArrayChip:\t".$array_chip->name()); | |
| 189 $self->db->rollback_ArrayChip([$array_chip]); #This should really remove all CS imports too? | |
| 190 } | |
| 191 | |
| 192 | |
| 193 #should never really have CS imports if not IMPORTED | |
| 194 #there is however the potential to trash a lot of data if we were to remove the CS importes by mistake | |
| 195 #do we need to check whether any other sets are using the data? | |
| 196 #we have to check for result using relevant cs_id and cc_id | |
| 197 #no removal of probes is the key thing here as nothing is dependent on the feature_ids | |
| 198 #get all result sets by array chip? or get all ExperimentalChips by array chip | |
| 199 #would have to be result set as we would find our own ecs. May find our own rset | |
| 200 | |
| 201 | |
| 202 throw('This needs updating'); | |
| 203 | |
| 204 if ($array_chip->has_status('IMPORTED_CS_'.$fg_cs->dbID())) { | |
| 205 $fimported = 1; | |
| 206 $self->log("Skipping ArrayChip feature import (".$array_chip->name().") already fully imported for ".$self->data_version()); | |
| 207 } elsif ($self->recovery()) { | |
| 208 $self->log("Rolling back partially imported ArrayChip features:\t".$array_chip->name()); | |
| 209 $self->db->rollback_ArrayChip_features($array_chip, $fg_cs); | |
| 210 } | |
| 211 | |
| 212 | |
| 213 #need to check whether already imported on specified schema_build | |
| 214 #check for appropriate file given format in input dir or take path | |
| 215 | |
| 216 #if (! $fimported) {#now need to do this irrespective of import status due to x y requirements | |
| 217 #need only do this once, i.e. if the cache isn't defined yet | |
| 218 #this is assuming cache will be built properly | |
| 219 #may cause problems if not cleaned up properly after use. | |
| 220 | |
| 221 #ignore xy requirements for now, these should be associated with results file | |
| 222 | |
| 223 | |
| 224 | |
| 225 #if (! defined $self->{'_probe_cache'}) { | |
| 226 if (! $fimported) { | |
| 227 | |
| 228 | |
| 229 | |
| 230 if (! $array_file) { | |
| 231 | |
| 232 if (! defined $self->get_dir('input')) { | |
| 233 throw("No input_dir defined, if you are running in a non Experiment context please use -array_file"); | |
| 234 } | |
| 235 | |
| 236 #hacky ..do better? | |
| 237 for my $suffix ("gff", "adf") { | |
| 238 $cmd = $self->get_dir('input')."/".$self->array_name()."*".$suffix; | |
| 239 @list = `ls $cmd 2>/dev/null`; | |
| 240 | |
| 241 if ((scalar(@list) == 1) && | |
| 242 ($list[0] !~ /No such file or directory/o)) { ###this is only printed to STDERR? | |
| 243 | |
| 244 if (! defined $array_file) { | |
| 245 $array_file = $list[0]; | |
| 246 } else { | |
| 247 throw("Found more than one array file : $array_file\t$list[0]\nSpecify one with -array_file"); | |
| 248 } | |
| 249 } | |
| 250 } | |
| 251 | |
| 252 throw("Cannot find array file. Specify one with -array_file") if (! defined $array_file); | |
| 253 } | |
| 254 | |
| 255 | |
| 256 if ($array_file =~ /gff/io) { | |
| 257 $array_file_format = "GFF"; | |
| 258 } elsif ($array_file =~ /adf/io) { | |
| 259 $array_file_format = "ADF"; | |
| 260 throw("Does not yet accomodate Sanger adf format"); | |
| 261 } else { | |
| 262 throw("Could not determine array file format: $array_file"); | |
| 263 } | |
| 264 | |
| 265 | |
| 266 #if (! $fimported) { | |
| 267 $fanal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name(($array_file_format eq "ADF") ? "VendorMap" : "LiftOver"); | |
| 268 #} | |
| 269 | |
| 270 $self->log("Parsing ".$self->vendor()." array data (".localtime().")"); | |
| 271 $fh = open_file($array_file); | |
| 272 my @lines = <$fh>; | |
| 273 close($fh); | |
| 274 | |
| 275 | |
| 276 | |
| 277 my ($chr, $start, $end, $strand, $pid);#, $x, $y, $meta_x, $meta_y, @xy); | |
| 278 | |
| 279 #avoid mutliple calls for same array | |
| 280 my $ac_dbid = $array->get_ArrayChip_by_design_id($array->name())->dbID(); | |
| 281 | |
| 282 #sort file to enable probe cache method for new feature imports | |
| 283 @lines = sort {(split/\t|\;/o, $a)[8] cmp (split/\t|\;/o, $b)[8]} @lines; | |
| 284 | |
| 285 #This is not sorting properly!! | |
| 286 | |
| 287 #my @tmp = map ((split/\t|\;/o, $_)[8], @lines); | |
| 288 #@tmp = sort @tmp; | |
| 289 | |
| 290 | |
| 291 #$self->log('Tmp sorted array is :\n'.join("\n", @tmp)."\n"); | |
| 292 | |
| 293 | |
| 294 | |
| 295 | |
| 296 foreach $line(@lines) { | |
| 297 $line =~ s/\r*\n//; | |
| 298 | |
| 299 #($chr, $start, $end, $ratio, $pid) = split/\t/o, $line; | |
| 300 #($chr, undef, undef, $start, $end, undef, $strand, undef, $pid, $x, $y, $meta_x, $meta_y) = split/\t|\;/o, $line; | |
| 301 ($chr, undef, undef, $start, $end, undef, $strand, undef, $pid) = split/\t|\;/o, $line; | |
| 302 | |
| 303 | |
| 304 if($self->ucsc_coords){ | |
| 305 $start += 1; | |
| 306 } | |
| 307 | |
| 308 | |
| 309 #$meta_x =~ s/META_X=//; | |
| 310 #$x =~ s/X=//; | |
| 311 #$x = $x + (($meta_x -1)*26); | |
| 312 #$meta_y =~ s/META_Y=//; | |
| 313 #$y =~ s/Y=//; | |
| 314 #$y = $y + (($meta_y -1)*25); | |
| 315 $pid =~ s/reporter_id=//o; | |
| 316 $chr =~ s/chr//; | |
| 317 $strand = ($strand eq "+") ? 0 : 1; | |
| 318 | |
| 319 #Hack!!!!!! This is still maintaining the probe entry (and result?) | |
| 320 if (! $self->cache_slice($chr)) { | |
| 321 warn("-- Skipping non standard probe (${pid}) with location:\t${chr}:${start}-${end}\n"); | |
| 322 next; | |
| 323 } | |
| 324 | |
| 325 | |
| 326 #need to parse dependant on file format | |
| 327 #also need to account for duplicate probes on grid | |
| 328 | |
| 329 #need to test for imprted here for rebuilding the probe_info cache | |
| 330 #this will result in always using first x y for the inital import (i.e. skip any probe already in cache) | |
| 331 #or using last x y for previosuly imported as we can't check the cache as it will already be there | |
| 332 #could check for x y | |
| 333 #should always check x y as this will also implicitly check if it is in the cache | |
| 334 | |
| 335 #if (! $self->get_probe_id_by_name($pid)) { #already present in cache | |
| 336 #if(! (@xy = @{$self->get_probe_x_y_by_name($pid)})){ | |
| 337 | |
| 338 #can we not use store_set_probes_features | |
| 339 #would have to add x y to probe, which is not logical as probe can have many x y's | |
| 340 #keep like this and just change cache_probe_info | |
| 341 | |
| 342 if (! $imported) { | |
| 343 #when we utilise array coords, we need to look up probe cache and store again with new coords | |
| 344 #we're currently storing duplicates i.e. different ids with for same probe | |
| 345 #when we should be storing two records for the same probe/id | |
| 346 #the criteria for this will be different for each vendor, may have to check container etc for NimbleGen | |
| 347 | |
| 348 $op = Bio::EnsEMBL::Funcgen::Probe->new( | |
| 349 -NAME => $pid, | |
| 350 -LENGTH => ($end - $start), | |
| 351 -ARRAY => $array, | |
| 352 -ARRAY_CHIP_ID => $ac_dbid, | |
| 353 -CLASS => 'EXPERIMENTAL', | |
| 354 ); | |
| 355 | |
| 356 ($op) = @{$op_adaptor->store($op)}; | |
| 357 #$self->cache_probe_info($pid, $op->dbID, $x, $y); | |
| 358 } else { | |
| 359 #update XY cache for previously imported array | |
| 360 #$self->cache_probe_info($pid, $self->get_probe_id_by_name($pid), $x, $y); | |
| 361 } | |
| 362 | |
| 363 #if (! $fimported) { | |
| 364 $of = Bio::EnsEMBL::Funcgen::ProbeFeature->new( | |
| 365 -START => $start, | |
| 366 -END => $end, | |
| 367 -STRAND => $strand, | |
| 368 -SLICE => $self->cache_slice($chr), | |
| 369 -ANALYSIS => $fanal, | |
| 370 -MISMATCHCOUNT => 0, | |
| 371 -PROBE_ID => ($imported) ? | |
| 372 $self->get_probe_id_by_name_Array($pid, $array) : $op->dbID(), | |
| 373 ); | |
| 374 | |
| 375 #get_probe_id will throw if not in cache, which means that we have an unimported probe | |
| 376 #for an ArrayChip which is flagged as imported, must have been omitted from the import deisgn | |
| 377 #probably a manual fix required. Can we log these and write an update/repair script. | |
| 378 | |
| 379 $of_adaptor->store($of); | |
| 380 #} | |
| 381 | |
| 382 #} else { | |
| 383 #warn("Sanger does not accomodate on plate duplicates yet, result are not linked to X Y coords, using first coords for probe if present in results for $pid\n");ĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦĦ | |
| 384 #} | |
| 385 } | |
| 386 | |
| 387 $array_chip->adaptor->set_status('IMPORTED_CS_'.$fg_cs->dbID(), $array_chip) if ! $fimported; | |
| 388 $self->log("ArrayChip:\t".$array_chip->design_id()." has been IMPORTED_CS_".$fg_cs->dbID()); | |
| 389 | |
| 390 } | |
| 391 | |
| 392 | |
| 393 | |
| 394 if (! $imported) { | |
| 395 $array_chip->adaptor->set_status('IMPORTED', $array_chip); | |
| 396 $self->log("ArrayChip:\t".$array_chip->design_id()." has been IMPORTED"); | |
| 397 $self->resolve_probe_data(); | |
| 398 } | |
| 399 | |
| 400 $self->log("Finished parsing ".$self->vendor()." array/probe data (".localtime().")"); | |
| 401 #warn("Finished parsing ".$self->vendor()." array/probe data (".localtime().")"); | |
| 402 | |
| 403 return; | |
| 404 } | |
| 405 | |
| 406 =head2 read_and_import_result_data | |
| 407 | |
| 408 Example : $imp->read_and_import_result_data(); | |
| 409 Description: Parses and imports result for the sanger PCR array platform | |
| 410 Returntype : none | |
| 411 Exceptions : none | |
| 412 Caller : Importer | |
| 413 Status : At risk | |
| 414 | |
| 415 =cut | |
| 416 | |
| 417 sub read_and_import_result_data{ | |
| 418 my $self = shift; | |
| 419 | |
| 420 #change this to read_gff_chip_results | |
| 421 #as opposed to gff channel results | |
| 422 #This should also use the default logic names for the Vendor, or take a user defined list | |
| 423 $self->log("Reading ".$self->vendor()." result data (".localtime().")"); | |
| 424 | |
| 425 my ($file, $chip_uid, $line, $echip); | |
| 426 my ($ratio, $pid, %chip_files, %roll_back); | |
| 427 my $of_adaptor = $self->db->get_ProbeFeatureAdaptor(); | |
| 428 my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor(); | |
| 429 my $chan_adaptor = $self->db->get_ChannelAdaptor(); | |
| 430 my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("SangerPCR"); | |
| 431 my $result_adaptor = $self->db->get_ResultSetAdaptor(); | |
| 432 #this is done to avoid having to self->array_name in loop, will make multiple array loop easier | |
| 433 my $array = ${$self->arrays()}[0]; | |
| 434 | |
| 435 #This works a little differently as we're not parsing a meta file | |
| 436 #so the echips haven't been added yet. | |
| 437 #This is treating each array chip as a separate array, unless arrayset is defined | |
| 438 #AT present we have no way of differentiating between different array_chips on same array???!!! | |
| 439 #Need to add functionality afterwards to collate array_chips into single array | |
| 440 | |
| 441 #First add the echips to the Experiment | |
| 442 | |
| 443 if (! @{$self->result_files()}) { | |
| 444 my $list = "ls ".$self->input_dir().'/[0-9]*-[0-9a-zA-Z]*\.all\.*'; | |
| 445 my @rfiles = `$list`; | |
| 446 $self->result_files(\@rfiles); | |
| 447 } | |
| 448 | |
| 449 | |
| 450 foreach $file(@{$self->result_files()}) { | |
| 451 chomp $file; | |
| 452 ($chip_uid = $file) =~ s/.*\///; | |
| 453 $chip_uid =~ s/\..*//; | |
| 454 | |
| 455 $self->log("Found SANGER results file for $chip_uid:\t$file"); | |
| 456 $chip_files{$chip_uid} = $file; | |
| 457 | |
| 458 | |
| 459 $echip = $ec_adaptor->fetch_by_unique_id_vendor($chip_uid, 'SANGER'); | |
| 460 | |
| 461 #this should throw if not recovery | |
| 462 #Nee to check Nimbelgen methods | |
| 463 | |
| 464 if ($echip) { | |
| 465 | |
| 466 if (! $self->recovery()) { | |
| 467 throw("ExperimentalChip(".$echip->unqiue_id().") already exists in the database\nMaybe you want to recover?"); | |
| 468 }else{ | |
| 469 #log pre-reg'd chips for rollback | |
| 470 $roll_back{$echip->dbID()} = 1; | |
| 471 } | |
| 472 } else { | |
| 473 | |
| 474 $echip = Bio::EnsEMBL::Funcgen::ExperimentalChip->new | |
| 475 ( | |
| 476 -EXPERIMENT_ID => $self->experiment->dbID(), | |
| 477 -ARRAY_CHIP_ID => $self->arrays->[0]->get_ArrayChip_by_design_id($array->name())->dbID(), | |
| 478 -UNIQUE_ID => $chip_uid, | |
| 479 ); | |
| 480 | |
| 481 ($echip) = @{$ec_adaptor->store($echip)}; | |
| 482 $self->experiment->add_ExperimentalChip($echip); #if we need a contains method in here , always add!! | |
| 483 } | |
| 484 | |
| 485 #do we need DUMMY entries any more? | |
| 486 | |
| 487 #sub this passing the echip? | |
| 488 foreach my $type ('DUMMY_TOTAL', 'DUMMY_EXPERIMENTAL') { | |
| 489 | |
| 490 my $channel = $chan_adaptor->fetch_by_type_experimental_chip_id($type, $echip->dbID()); | |
| 491 | |
| 492 if ($channel) { | |
| 493 if (! $self->recovery()) { | |
| 494 throw("Channel(".$echip->unique_id().":$type) already exists in the database\nMaybe you want to recover?"); | |
| 495 } | |
| 496 } else { | |
| 497 | |
| 498 $channel = Bio::EnsEMBL::Funcgen::Channel->new | |
| 499 ( | |
| 500 -EXPERIMENTAL_CHIP_ID => $echip->dbID(), | |
| 501 -TYPE => $type, | |
| 502 ); | |
| 503 | |
| 504 ($channel) = @{$chan_adaptor->store($channel)}; | |
| 505 } | |
| 506 } | |
| 507 } | |
| 508 | |
| 509 | |
| 510 | |
| 511 #Now get rset using experiment echips | |
| 512 my $rset = $self->get_import_ResultSet($analysis, 'experimental_chip'); | |
| 513 | |
| 514 if ($rset) { #we have some new data | |
| 515 | |
| 516 foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) { | |
| 517 | |
| 518 if ($echip->has_status('IMPORTED_SangerPCR', $echip)) { | |
| 519 $self->log("ExperimentalChip(".$echip->unique_id().") has already been imported"); | |
| 520 } else { | |
| 521 | |
| 522 my $cc_id = $rset->get_chip_channel_id($echip->dbID()); | |
| 523 | |
| 524 if ($self->recovery() && $roll_back{$echip->dbID()}){ | |
| 525 $self->log("Rolling back results for ExperimentalChip:\t".$echip->unique_id()); | |
| 526 $self->rollback_results($cc_id); | |
| 527 } | |
| 528 | |
| 529 $self->log("Reading SANGER result file for ".$echip->unique_id().":\t".$chip_files{$echip->unique_id()}); | |
| 530 $self->get_probe_cache_by_Array($array) || throw('Failed to reset probe cache handle'); | |
| 531 my $fh = open_file($chip_files{$echip->unique_id()}); | |
| 532 my @lines = <$fh>; | |
| 533 close($fh); | |
| 534 | |
| 535 my $rfile_path = $self->get_dir("norm")."/result.SangerPCR.".$echip->unique_id().".txt"; | |
| 536 my $rfile = open_file($rfile_path, '>'); | |
| 537 my $r_string = ""; | |
| 538 | |
| 539 | |
| 540 @lines = sort {(split/\t|\:/o, $a)[5] cmp (split/\t|\:/o, $b)[5]} @lines; | |
| 541 | |
| 542 foreach my $line (@lines) { | |
| 543 $line =~ s/\r*\n//o; | |
| 544 | |
| 545 ($ratio, undef, $pid) = (split/\t|\:/o, $line)[3..5]; | |
| 546 $pid =~ s/.*://o; | |
| 547 | |
| 548 $ratio = '\N' if $ratio eq 'NA'; #NULL is still useful info to store in result | |
| 549 #my ($x, $y) = @{$self->get_probe_x_y_by_name($pid)}; | |
| 550 | |
| 551 #this is throwing away the encode region which could be used for the probeset/family? | |
| 552 $r_string .= '\N'."\t".$self->get_probe_id_by_name_Array($pid, $array)."\t${ratio}\t${cc_id}\t".'\N'."\t".'\N'."\n";#${x}\t${y}\n"; | |
| 553 } | |
| 554 | |
| 555 print $rfile $r_string; | |
| 556 close($rfile); | |
| 557 | |
| 558 $self->log("Importing:\t$rfile_path"); | |
| 559 $self->db->load_table_data("result", $rfile_path); | |
| 560 $self->log("Finished importing:\t$rfile_path"); | |
| 561 $echip->adaptor->set_status('IMPORTED_SangerPCR', $echip); | |
| 562 } | |
| 563 } | |
| 564 | |
| 565 | |
| 566 | |
| 567 } else { | |
| 568 $self->log("No new data, skipping result parse"); | |
| 569 } | |
| 570 | |
| 571 $self->log("Finished reading and importing ".$self->vendor()." result data (".localtime().")"); | |
| 572 return; | |
| 573 } | |
| 574 | |
| 575 | |
| 576 | |
| 577 1; |
