0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2011 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =head1 NAME
|
|
20
|
|
21 Bio::EnsEMBL::Funcgen::MAGE
|
|
22
|
|
23 =head1 SYNOPSIS
|
|
24
|
|
25 my $imp = Bio::EnsEMBL::Funcgen::Importer->new(%params);
|
|
26 $imp->register_experiment();
|
|
27
|
|
28
|
|
29 =head1 DESCRIPTION
|
|
30
|
|
31 B<This program> is a base main class for all MAGE type array importers(e.g. Nimblegen).
|
|
32
|
|
33 =cut
|
|
34
|
|
35 ################################################################################
|
|
36
|
|
37 package Bio::EnsEMBL::Funcgen::Parsers::MAGE;
|
|
38
|
|
39 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw(get_date open_file run_system_cmd);
|
|
40 use Bio::EnsEMBL::Utils::Exception qw( throw deprecate );
|
|
41 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
|
|
42 use Bio::EnsEMBL::Funcgen::Utils::Helper;
|
|
43 use Bio::MAGE::XMLUtils;
|
|
44
|
|
45
|
|
46 use File::Path;
|
|
47 use strict;
|
|
48 use vars qw(@ISA);
|
|
49 @ISA = qw(Bio::EnsEMBL::Funcgen::Utils::Helper);
|
|
50
|
|
51
|
|
52
|
|
53 ################################################################################
|
|
54
|
|
55 =head2 new
|
|
56
|
|
57 Description : Constructor method
|
|
58
|
|
59 Arg [1] : hash containing optional attributes:
|
|
60
|
|
61 ReturnType : Bio::EnsEMBL::Funcgen::MAGE
|
|
62 Example : my $Exp = Bio::EnsEMBL::Nimblegen->new(%params);
|
|
63 Exceptions : throws if mandatory params are not set or DB connect fails
|
|
64 Caller : General
|
|
65 Status : Medium - potential for %params names to change, remove %attrdata?
|
|
66
|
|
67 =cut
|
|
68
|
|
69 ################################################################################
|
|
70
|
|
71 sub new{
|
|
72 my ($caller) = shift;
|
|
73 my $class = ref($caller) || $caller;
|
|
74 my $self = $class->SUPER::new(@_);
|
|
75
|
|
76 #This needs to be an Importer!
|
|
77 throw("This is base class for the experiment Bio::EnsEMBL::Funcgen::Parsers, needs to inherit from Bio::EnsEMBL::Funcgen::Importer") if(! $self->isa("Bio::EnsEMBL::Funcgen::Importer"));
|
|
78
|
|
79
|
|
80 #Are we not passing any Helper params?
|
|
81 #Log file etc is set in the run script
|
|
82
|
|
83 my ($write_mage, $no_mage, $vendor)
|
|
84 = rearrange(['WRITE_MAGE', 'NO_MAGE', 'VENDOR'], @_);
|
|
85
|
|
86
|
|
87 #$self->{'update_xml'} = $update_xml || 0;
|
|
88 $self->{'write_mage'} = $write_mage || 0;
|
|
89 $self->{'no_mage'} = $no_mage || 0;
|
|
90 $self->{'vendor'} = $vendor;
|
|
91
|
|
92
|
|
93 if ($self->vendor ne 'NIMBLEGEN'){
|
|
94 $self->{'no_mage'} = 1;
|
|
95 warn "Hardcoding no_mage for non-NIMBLEGEN imports";
|
|
96 }
|
|
97
|
|
98
|
|
99 if($self->{'no_mage'} && $self->{'write_mage'}){
|
|
100 throw('-no_mage and -write_mage options are mutually exclusive, please select just one');
|
|
101 }
|
|
102
|
|
103 return $self;
|
|
104 }
|
|
105
|
|
106 =head2 process_experiment_config
|
|
107
|
|
108 Example : $self->init_experiment_import();
|
|
109 Description: Initialises import by creating working directories
|
|
110 and by storing the Experiemnt
|
|
111 Returntype : none
|
|
112 Exceptions : warns and throws depending on recover and Experiment status
|
|
113 Caller : general
|
|
114 Status : at risk - merge with register exeriment
|
|
115
|
|
116 =cut
|
|
117
|
|
118 #This is actually processing the tab2mage file & writing XML
|
|
119
|
|
120 sub process_experiment_config{
|
|
121 my $self = shift;
|
|
122
|
|
123 #Here, this is where we need to call the a Parser from Importer to do this for only MAGE experiments
|
|
124 #validate_import?
|
|
125
|
|
126 #This is only used for the first test below.
|
|
127 my $exp_adaptor = $self->db->get_ExperimentAdaptor();
|
|
128 my $xml = $exp_adaptor->fetch_mage_xml_by_experiment_name($self->name);# if $self->{'write_xml'};
|
|
129
|
|
130 #DO NOT CHANGE THIS LOGIC!
|
|
131 #write mage if we specify or we don't have a the final xml or the template
|
|
132 #recovery is turned on to stop exiting when previously stored chips are found from the 'write_mage' run.
|
|
133 #This does mean that if you import without running the write_mage step
|
|
134 #you could potentially be overwriting someone elses experiment info
|
|
135 #No way of getting around this, need to make warning obvious, add to end of log!!!
|
|
136 #We always want to write and update xml and ResultSets if we're running the 2nd stage of the import
|
|
137 #Why would we ever want to skip the validate process?
|
|
138 #Leave for now as this is working as we want it
|
|
139 #But propose to remove skip functionality
|
|
140
|
|
141 if( ! $self->{'no_mage'}){
|
|
142
|
|
143 if($self->{'write_mage'} || !( -f $self->get_config('tab2mage_file') || $xml)){
|
|
144 $self->{'write_mage'} = 1;
|
|
145 $self->backup_file($self->get_config('tab2mage_file'));
|
|
146 }
|
|
147 #elsif($xml && (! $self->{'update_xml'})){#Changed this so we always update
|
|
148 #elsif(! $self->{'update_xml'}){
|
|
149
|
|
150
|
|
151
|
|
152 #Here, we need to always update_xml
|
|
153 #If we are doing the 2nd stage
|
|
154 #Currently this is skipping as we haven't explicitly set it
|
|
155 #To remove this...
|
|
156 #what we need to do is check that we don't test for update_xml,
|
|
157 # i.e. assuming that we're running the second stage of the import.
|
|
158 # Therefore we need a boolean to set whether it is the first stage..else update_xml implicit
|
|
159 # write mage is explicit flag
|
|
160 # Or if we have not tab2mage file?
|
|
161 # we can then override this explicitly with update_xml?
|
|
162 # WE're never likely edit the xml directly, so we always want to validate and update
|
|
163 # so write mage flag become update_experiment? No this is no obvious behaviour
|
|
164 # We need to warn about removing the write_mage flag after we have updated it
|
|
165 # Otherwise we will never get to 2nd stage
|
|
166
|
|
167
|
|
168 #No mage is still valid as we may want to jus import and experiment
|
|
169 #Before receiving correct meta data
|
|
170 #When we can then rerun the import with -write_mage to update the resultsets
|
|
171
|
|
172 # $self->{'recover'} = 1;
|
|
173 # $self->{'skip_validate'} = 1;
|
|
174 #}
|
|
175 elsif( -f $self->get_config('tab2mage_file')){#Run Tab2Mage
|
|
176
|
|
177 $self->backup_file($self->get_config('mage_xml_file'));
|
|
178 my $cmd = 'tab2mage.pl -e '.$self->get_config('tab2mage_file').' -k -t '.$self->get_dir('output').
|
|
179 ' -c -d '.$self->get_dir('results');
|
|
180
|
|
181 $self->log('Reading tab2mage file');
|
|
182 my $t2m_exit_code = run_system_cmd($cmd, 1);#no exit flag due to non-zero exit codes
|
|
183 warn "tab2mage exit code is $t2m_exit_code";
|
|
184
|
|
185 if(! ($t2m_exit_code > -1) && ($t2m_exit_code <255)){
|
|
186 throw("tab2mage failed. Please check and correct:\t".$self->get_config('tab2mage_file')."\n...and try again");
|
|
187 }
|
|
188
|
|
189 $self->{'recover'} = 1;
|
|
190 }
|
|
191 }
|
|
192
|
|
193 return;
|
|
194 }
|
|
195
|
|
196 =heead init_tab2mage_export
|
|
197
|
|
198 Example : $self->init_tab2mage_export;
|
|
199 Description: Writes the standard experiment section of the tab2mage file
|
|
200 Returntype : FileHandle
|
|
201 Exceptions : ???
|
|
202 Caller : general
|
|
203 Status : at risk
|
|
204
|
|
205 =cut
|
|
206
|
|
207 sub init_tab2mage_export{
|
|
208 my $self = shift;
|
|
209
|
|
210 $self->backup_file($self->get_config('tab2mage_file')) if(-f $self->get_config('tab2mage_file'));
|
|
211
|
|
212 my $t2m_file = open_file($self->get_config('tab2mage_file'), '>');
|
|
213
|
|
214 #reformat this
|
|
215 my $exp_section = "experiment section\ndomain\t".(split/@/, $self->contact())[1]."\naccession\t\n".
|
|
216 "quality_control\tbiological_replicate\nexperiment_design_type\tbinding_site_identification\n".
|
|
217 "name\t".$self->name()."\nrelease_date\t\nsubmission_date\t\nsubmitter\t???\n".
|
|
218 "submitter_email\t???\ninvestigator\t???\ninvestigator_email\t???\norganization\t???\naddress\t".
|
|
219 "???\npublication_title\t\nauthors\t\njournal\t\nvolume\t\nissue\t\npages\t\nyear\t\npubmed_id\t\n";
|
|
220
|
|
221 my $protocol_section = "Protocol section\naccession\tname\ttext\tparameters\n";
|
|
222
|
|
223 foreach my $protocol(sort (keys %{$self->get_config('protocols')})){
|
|
224 $protocol_section .= $self->get_config('protocols')->{$protocol}->{'accession'}.
|
|
225 "\t".$self->get_config('protocols')->{$protocol}->{'name'}.
|
|
226 "\t".$self->get_config('protocols')->{$protocol}->{'text'}."\t";
|
|
227
|
|
228 $protocol_section .= (defined $self->get_config('protocols')->{$protocol}->{'parameters'}) ?
|
|
229 $self->get_config('protocols')->{$protocol}->{'parameters'}."\t\n" : "\t\n";
|
|
230 }
|
|
231
|
|
232 #File[raw] Array[accession] Array[serial] Protocol[grow] Protocol[treatment] Protocol[extraction] Protocol[labeling] Protocol[hybridization] Protocol[scanning] BioSource Sample Extract LabeledExtract Immunoprecipitate Hybridization BioSourceMaterial SampleMaterial ExtractMaterial LabeledExtractMaterial Dye BioMaterialCharacteristics[Organism] BioMaterialCharacteristics[BioSourceType] BioMaterialCharacteristics[StrainOrLine] BioMaterialCharacteristics[CellType] BioMaterialCharacteristics[Sex] FactorValue[StrainOrLine] FactorValue[Immunoprecipitate]
|
|
233
|
|
234
|
|
235 #Need to do this bit better?
|
|
236 #have array of fields. We can then populate a hash in the read method based on field names, then use the array to print in order
|
|
237
|
|
238 my $hyb_header = "\nHybridization section\n".join("\t", @{$self->hybridisation_fields()});
|
|
239
|
|
240 print $t2m_file $exp_section."\n".$protocol_section."\n".$hyb_header."\n";
|
|
241
|
|
242 return $t2m_file;
|
|
243 }
|
|
244
|
|
245
|
|
246 #Move to MAGE package?
|
|
247
|
|
248 sub hybridisation_fields{
|
|
249 my $self = shift;
|
|
250
|
|
251 return ['File[raw]', 'Array[accession]', 'Array[serial]',
|
|
252 (map 'Protocol['.$_.']', (sort (keys %{$self->get_config('protocols')}))),
|
|
253 'BioSource', 'Sample', 'Extract', 'LabeledExtract', 'Immunoprecipitate', 'Hybridization',
|
|
254 'BioSourceMaterial', 'SampleMaterial', 'ExtractMaterial', 'LabeledExtractMaterial',
|
|
255 'Dye', 'BioMaterialCharacteristics[Organism]', 'BioMaterialCharacteristics[BioSourceType]',
|
|
256 'BioMaterialCharacteristics[StrainOrLine]', 'BioMaterialCharacteristics[CellType]',
|
|
257 'BioMaterialCharacteristics[Sex]', 'FactorValue[StrainOrLine]', 'FactorValue[Immunoprecipitate]'];
|
|
258 }
|
|
259
|
|
260
|
|
261
|
|
262 #=head2 register_experiment
|
|
263 #
|
|
264 # Example : $imp->register_experiment()
|
|
265 # Description: General control method, performs all data import and normalisations
|
|
266 # Arg [1] : optional - dnadb DBAdaptor
|
|
267 # Returntype : none
|
|
268 # Exceptions : throws if arg is not Bio::EnsEMBL::DBSQL::DBAdaptor
|
|
269 # Caller : general
|
|
270 # Status : Medium
|
|
271 #
|
|
272 #=cut
|
|
273
|
|
274 #write/validate_mage
|
|
275
|
|
276 sub write_validate_experiment_config{
|
|
277 my $self = shift;
|
|
278
|
|
279
|
|
280 if($self->{'write_mage'} || $self->{'no_mage'}){
|
|
281 $self->read_data("array");
|
|
282
|
|
283 if(! $self->{'no_mage'}){
|
|
284 $self->log("PLEASE CHECK AND EDIT AUTOGENERATED TAB2MAGE FILE:\t".$self->get_config('tab2mage_file'));
|
|
285 #we could make this print only if it was set by the user, not by the Importer
|
|
286 $self->log('REMEMBER TO REMOVE -write_mage FLAG BEFORE UPDATING');
|
|
287 exit;
|
|
288 }
|
|
289 }
|
|
290 elsif(! $self->{'no_mage'}){#This should be a no_channel flag, set dependent on import mode(gff_chip, gff_chan)
|
|
291 #Need to accomodate chip level imports in validate?
|
|
292
|
|
293 if (! $self->{'skip_validate'}){
|
|
294
|
|
295 $self->log("Validating mage file:\t".$self->get_config('mage_xml_file'));
|
|
296
|
|
297
|
|
298 #Updating ResultSets:
|
|
299 #Given that we might want to add a chip to an experiment we will also need to update the tab2MAGE
|
|
300 #mage_xml and ResultSets accordingly.
|
|
301
|
|
302 #This should happen if we specify update_xml
|
|
303 #Should recovery also always force update?
|
|
304 #Considering the two run modes, write tab2mage & validate and import
|
|
305 #There is a subtle difference between recovery and update mage_xml
|
|
306 #Do we always run in recovery mode for the validate&import step?
|
|
307 #Yes we do, so can't guarantee the this means we want to update.
|
|
308 #So we need to change update_xml to update to reflect the changed functionality on ResultSets
|
|
309
|
|
310 #If we run an update without on then chips will be loaded but xml and ResultSets will not be altered :(
|
|
311 #If we're running the 2nd stage we should always be updating the xml anyway!!!!
|
|
312 #As there is no reason to rerun the validate & import step without it.(unless we're debugging maybe)
|
|
313 #So why should we ever run without it?
|
|
314
|
|
315 #To update ResultSets we validate as normal and then update where appropriate
|
|
316 #What has precedence? Replicate name?
|
|
317 #Update echip types as appropriate
|
|
318 #What if this invalidates original rsets?
|
|
319 #Then list sets not covered for removal by script?
|
|
320
|
|
321
|
|
322
|
|
323 my (%echips, @log);
|
|
324 my $rset_adaptor = $self->db->get_ResultSetAdaptor;
|
|
325 my $chan_anal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name('RawValue');
|
|
326
|
|
327 #need to change this to default analysis
|
|
328 #There we issues with setting VSN_GLOG as an env var
|
|
329 #as this is tested for and the norm was skipped for some reason?
|
|
330 my $chip_anal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($self->norm_method());
|
|
331
|
|
332 #Try import sets like this first, so we know ther is new data
|
|
333 my $chan_rset = $self->get_import_ResultSet($chan_anal, 'channel');
|
|
334 my $rset = $self->get_import_ResultSet($chip_anal, 'experimental_chip');
|
|
335
|
|
336
|
|
337 #Else get them anyway and log
|
|
338 if(! $rset){
|
|
339
|
|
340 if($chan_rset){
|
|
341 $self->log('Identified partial Channel only import, updating MAGE-XML');
|
|
342 }
|
|
343 else{
|
|
344 ($chan_rset) = @{$rset_adaptor->fetch_all_by_name_Analysis($self->experiment->name.'_IMPORT', $chan_anal)};
|
|
345 #Don't need to test for >1 here as this has already been done in get_import_ResultSet
|
|
346 $self->log('All ExperimentalChips imported, updating MAGE-XML only');
|
|
347 }
|
|
348
|
|
349 ($rset) = @{$rset_adaptor->fetch_all_by_name_Analysis($self->experiment->name.'_IMPORT', $chip_anal)};
|
|
350 }
|
|
351
|
|
352
|
|
353 #This will never happen now due to the change tab2mage rules in init_experiment
|
|
354 #Remove?
|
|
355 if(! $rset){
|
|
356 throw('Cannot find ResultSet, are you trying to import a new experiment which already has a tab2mage file present? Try removing the file, or specifying the -write_mage flag to parse_and_import.pl');
|
|
357 }
|
|
358
|
|
359 if(! -l $self->get_dir('output').'/MAGE-ML.dtd'){
|
|
360 system('ln -s '.$ENV{'EFG_DATA'}.'/MAGE-ML.dtd '.$self->get_dir('output').'/MAGE-ML.dtd') == 0 ||
|
|
361 throw('Failed to link MAGE-ML.dtd');
|
|
362 }
|
|
363
|
|
364
|
|
365 $self->log('VALIDATING MAGE XML');
|
|
366 my $reader = Bio::MAGE::XML::Reader->new();
|
|
367 my $mage_xml ||= $self->get_config('mage_xml_file');
|
|
368 $self->{'mage'} = $reader->read($mage_xml);
|
|
369
|
|
370 #this should only ever return 1 for an import
|
|
371 foreach my $mage_exp(@{$self->{'mage'}->getExperiment_package->getExperiment_list()}){
|
|
372
|
|
373 if($mage_exp->getName() ne $self->name()){
|
|
374 $self->log('MAGE experiment name ('.$mage_exp->getName().') does not match import name ('.$self->name().')');
|
|
375 }
|
|
376
|
|
377 #add more experiment level validation here?
|
|
378
|
|
379 foreach my $assay (@{$mage_exp->getBioAssays()}){
|
|
380
|
|
381 if($assay->isa('Bio::MAGE::BioAssay::PhysicalBioAssay')){#channel
|
|
382 $self->log('Validating PhysicalBioAssay "'.$assay->getName()."'\n");#hyb name(this is the file name for measured assays
|
|
383
|
|
384 my $bioassc = $assay->getBioAssayCreation();#This is a Hybridisation
|
|
385 my $array = $bioassc->getArray();#this is an ArrayChip
|
|
386 my $design_id = $array->getArrayDesign->getIdentifier();
|
|
387 my $chip_uid = $array->getArrayIdentifier();
|
|
388
|
|
389
|
|
390 foreach my $echip(@{$rset->get_ExperimentalChips()}){
|
|
391
|
|
392
|
|
393 if($echip->unique_id() eq $chip_uid){
|
|
394 $self->log("Found ExperimentalChip:\t".$chip_uid);
|
|
395
|
|
396 if(! exists $echips{$chip_uid}){
|
|
397 $echips{$chip_uid} = {(
|
|
398 total_biorep => undef,
|
|
399 total_biotechrep => undef,
|
|
400 experimental_biorep => undef,
|
|
401 experimental_biotechrep => undef,
|
|
402 total_dye => undef,
|
|
403 experimental_dye => undef,
|
|
404 cell_type => undef,
|
|
405 feature_type => undef,
|
|
406 )};
|
|
407 }
|
|
408
|
|
409 #Validate ArrayChip
|
|
410 my ($achip) = @{$self->db->get_ArrayChipAdaptor->fetch_all_by_ExperimentalChips([$echip])};
|
|
411
|
|
412 if($achip->design_id() ne $design_id){
|
|
413 push @log, "ArrayDesign Identifier (${design_id}) does not match ArrayChip design ID (".
|
|
414 $achip->design_id().")\n\tSkipping channel and replicate validation";
|
|
415 #skip the channel/replicate validation here?
|
|
416 }
|
|
417 else { #validate channels and replicate names
|
|
418
|
|
419 foreach my $src_biomat (@{$bioassc->getSourceBioMaterialMeasurements()}) { #Channel materials(X1)?
|
|
420 my $biomat = $src_biomat->getBioMaterial(); #LabelledExtract (IP/Control)
|
|
421 #we could sub this passing $echip and biomat?
|
|
422 #messy to pass regexs and populate correct echip hash attrs
|
|
423 #also messy to populate log
|
|
424 #keeping nested loop also prevents further obfuscation
|
|
425 #do we need to do all the defined checks, or maybe just the first one?
|
|
426 #Then we can skip all following warning?
|
|
427
|
|
428 foreach my $treat (@{$biomat->getTreatments()}) {
|
|
429 #As there is effectively one more level of material extraction for the IP channel
|
|
430 #this loop will returns materials an iteration out of sync for each channel
|
|
431
|
|
432 foreach my $ssrc_biomat (@{$treat->getSourceBioMaterialMeasurements()}) { #Channel measurement(x1)
|
|
433 my $sbiomat = $ssrc_biomat->getBioMaterial();
|
|
434 #This will either be techrep name for control of IP name for experimental channel
|
|
435 #SOM0035_BR1_TR2 IP #Immunoprecicpitate
|
|
436 #SOM0035_BR1_TR2 #Extract
|
|
437
|
|
438 if ($sbiomat->getName() =~ /BR[0-9]+_TR[0-9]+$/o) { #Total
|
|
439
|
|
440 if (! defined $echips{$chip_uid}{'total_biotechrep'}) {
|
|
441 $echips{$chip_uid}{'total_biotechrep'} = $sbiomat->getName();
|
|
442 }
|
|
443 else{
|
|
444 push @log, "Found two TOTAL Channels on same chip with biotechreps:\t".$sbiomat->getName().
|
|
445 " and ".$echips{$chip_uid}{'total_biotechrep'};
|
|
446 }
|
|
447 }else{#Experimental
|
|
448
|
|
449 #get feature type from assay
|
|
450 my $fv_ref = $assay->getBioAssayFactorValues();
|
|
451 if(! defined $fv_ref){
|
|
452 throw('No FactorValues found, you must populate the "Immunoprecipitate" field. Maybe you forgot to specify -feature_type?');
|
|
453 }
|
|
454
|
|
455 my ($feature_type);
|
|
456
|
|
457 foreach my $fvalue(@{$fv_ref}){
|
|
458
|
|
459 if($fvalue->getValue()->getCategory() eq 'Immunoprecipitate'){
|
|
460 $feature_type = $fvalue->getName();
|
|
461 $feature_type =~ s/anti\s*-\s*//;
|
|
462 $feature_type =~ s/\s*antibody\s*//;
|
|
463 }
|
|
464 }
|
|
465 $echips{$chip_uid}{'feature_type'} = $feature_type;
|
|
466 }
|
|
467
|
|
468 foreach my $ttreat (@{$sbiomat->getTreatments()}) {
|
|
469
|
|
470 foreach my $tsrc_biomat (@{$ttreat->getSourceBioMaterialMeasurements()}) {
|
|
471 my $tbiomat = $tsrc_biomat->getBioMaterial();
|
|
472 #SOM0035_BR1_TR2 #Extract (exp)
|
|
473 #SOM0035_BR1 #Sample (total)
|
|
474
|
|
475 if ($tbiomat->getName() =~ /BR[0-9]+_TR[0-9]+$/o) { #experimental
|
|
476
|
|
477 if (! defined $echips{$chip_uid}{'experimental_biotechrep'}) {
|
|
478 $echips{$chip_uid}{'experimental_biotechrep'} = $tbiomat->getName();
|
|
479 }
|
|
480 else{
|
|
481 push @log, "Found two EXPERIMENTAL Channels on same chip with biotechreps:\t".$tbiomat->getName().
|
|
482 " and ".$echips{$chip_uid}{'experimental_biotechrep'};
|
|
483 }
|
|
484
|
|
485 my $dye = $biomat->getLabels()->[0]->getName();
|
|
486
|
|
487 foreach my $chan (@{$echip->get_Channels()}) {
|
|
488
|
|
489 if ($chan->type() eq 'EXPERIMENTAL') {
|
|
490
|
|
491 if (uc($dye) ne uc($chan->dye())) {
|
|
492 push @log, "EXPERIMENTAL channel dye mismatch:\tMAGE = ".uc($dye).' vs DB '.uc($chan->dye);
|
|
493 } else {
|
|
494 $echips{$chip_uid}{'experimental_dye'} = uc($dye);
|
|
495 }
|
|
496 }
|
|
497 }
|
|
498 }
|
|
499 else { #control
|
|
500
|
|
501 if (! defined $echips{$chip_uid}{'total_biorep'}) {
|
|
502 $echips{$chip_uid}{'total_biorep'} = $tbiomat->getName();
|
|
503 }
|
|
504 else{
|
|
505 push @log, "Found two TOTAL Channels on same chip with biotechreps:\t".$tbiomat->getName().
|
|
506 " and ".$echips{$chip_uid}{'total_biorep'};
|
|
507 }
|
|
508
|
|
509 my $dye = $biomat->getLabels()->[0]->getName();
|
|
510
|
|
511 foreach my $chan (@{$echip->get_Channels()}) {
|
|
512
|
|
513 if ($chan->type() eq 'TOTAL') {
|
|
514
|
|
515 if (uc($dye) ne uc($chan->dye())) {
|
|
516 push @log, "TOTAL channel dye mismatch:\tMAGE = ".uc($dye).' vs DB '.uc($chan->dye);
|
|
517 }
|
|
518 else {
|
|
519 $echips{$chip_uid}{'total_dye'} = uc($dye);
|
|
520 }
|
|
521 }
|
|
522 }
|
|
523 }
|
|
524 #could do one more iteration and get Source and FeatureType?
|
|
525 #we should really extend this, and then update the EC cell_type and feature_types
|
|
526 #these features might not be biotmats tho...need to check
|
|
527
|
|
528
|
|
529 foreach my $ftreat (@{$tbiomat->getTreatments()}) {
|
|
530
|
|
531 foreach my $fsrc_biomat (@{$ftreat->getSourceBioMaterialMeasurements()}) {
|
|
532 my $fbiomat = $fsrc_biomat->getBioMaterial();
|
|
533 #EXPERIMENTAL - biorep
|
|
534 #TOTAL - source/cell type
|
|
535 my $cell_type;
|
|
536
|
|
537 if($fbiomat->getName() =~ /BR[0-9]+$/o){#EXPERIMETNAL
|
|
538
|
|
539 if(! defined $echips{$chip_uid}{'experimental_biorep'}){
|
|
540 $echips{$chip_uid}{'experimental_biorep'} = $fbiomat->getName();
|
|
541 }
|
|
542 else{
|
|
543 push @log, "Found two Experimental Channels on same chip with bioreps:\t".$fbiomat->getName().
|
|
544 " and ".$echips{$chip_uid}{'experimental_biorep'};
|
|
545 }
|
|
546
|
|
547
|
|
548 #last treatment/measurement/biomat level should go here
|
|
549 #as TOTAL channel does not have another level and will fail
|
|
550 foreach my $xtreat (@{$fbiomat->getTreatments()}) {
|
|
551
|
|
552 foreach my $xsrc_biomat (@{$xtreat->getSourceBioMaterialMeasurements()}) {
|
|
553 my $xbiomat = $xsrc_biomat->getBioMaterial();
|
|
554
|
|
555 foreach my $char(@{$xbiomat->getCharacteristics()}){
|
|
556 $cell_type = $char->getValue() if($char->getCategory() eq 'CellType');
|
|
557 }
|
|
558 }
|
|
559 }
|
|
560
|
|
561 }else{#this should be BioSource
|
|
562 #which should have CellType as characteristic
|
|
563 #we could change tab2mage and have this as a factor value,
|
|
564 #but don't want to start messing with "standard" format
|
|
565
|
|
566 foreach my $char(@{$fbiomat->getCharacteristics()}){
|
|
567 $cell_type = $char->getValue() if($char->getCategory() eq 'CellType');
|
|
568 }
|
|
569 }
|
|
570
|
|
571 #can have cell_type validation here
|
|
572 if(! defined $echips{$chip_uid}{'cell_type'}){
|
|
573 $echips{$chip_uid}{'cell_type'} = $cell_type;
|
|
574 }
|
|
575 elsif( $echips{$chip_uid}{'cell_type'} ne $cell_type){
|
|
576 push @log, "Found Channels on same chip (${chip_uid}) with different cell types:\t".
|
|
577 $cell_type." and ".$echips{$chip_uid}{'cell_type'};
|
|
578 }
|
|
579 }
|
|
580 }
|
|
581 }
|
|
582 }
|
|
583 }
|
|
584 }
|
|
585 }
|
|
586 }
|
|
587 } #end of echip
|
|
588 } #end of foreach echip
|
|
589 } #end of physbioassay
|
|
590 } #end of foreach assay
|
|
591 } #end of foreach exp
|
|
592
|
|
593
|
|
594
|
|
595 #we should fail here with log before we update the result sets
|
|
596
|
|
597 #we need to build rep names
|
|
598 #we're currently using sample labels, in the tab2mage file
|
|
599 #altho' previous sets have been using exp name
|
|
600 #these have been manually patched afterwards
|
|
601
|
|
602 #More desirable to have exp name as rset name, but no way of doing BR validation
|
|
603 #based on sample label, if we don't have it in the tab2mage
|
|
604 #if we change it in the DB then we need to update the tab2mage
|
|
605
|
|
606 #no way to do this when generating tab2mage as the user hasn't yet defined the reps
|
|
607 #we could just make reps based on sample labels
|
|
608 #then we just assume that alterations made by the user are correct
|
|
609 #as we can no longer validate using sample labels
|
|
610 #can still validate using cell/feature type
|
|
611
|
|
612 #no longer need vendor specific validation as this will be done in tab2mage generation
|
|
613
|
|
614
|
|
615 #We need to validate reps here
|
|
616 #then update ec records as appropriate and then create rsets
|
|
617
|
|
618 my (%bio_reps, %tech_reps);
|
|
619 my $ct_adaptor = $self->db->get_CellTypeAdaptor();
|
|
620 my $ft_adaptor = $self->db->get_FeatureTypeAdaptor();
|
|
621
|
|
622 #select rs.*, ec.*, c.* from result_set rs, chip_channel cc, channel c, experimental_chip ec where rs.result_set_id=cc.result_set_id and cc.table_name='experimental_chip' and cc.table_id=ec.experimental_chip_id and cc.table_id=c.experimental_chip_id order by name;
|
|
623
|
|
624 foreach my $echip (@{$rset->get_ExperimentalChips()}) {
|
|
625
|
|
626 my ($biorep, $biotechrep);
|
|
627
|
|
628 if (! exists $echips{$echip->unique_id()}) {
|
|
629 push @log, "No MAGE entry found for ExperimentalChip:\t".$echip->unique_id();
|
|
630 }
|
|
631 else {
|
|
632
|
|
633 foreach my $chan_type('total', 'experimental'){
|
|
634
|
|
635 $biorep = $echips{$echip->unique_id()}{$chan_type.'_biorep'};
|
|
636 $biotechrep = $echips{$echip->unique_id()}{$chan_type.'_biotechrep'};
|
|
637
|
|
638 if (! defined $biotechrep) {
|
|
639 push @log, 'ExperimentalChip('.$echip->unique_id().') Extract field do not meet naming convention(SAMPLE_BRN_TRN)';
|
|
640 } #! defined biorep? will never occur at present
|
|
641 elsif ($biotechrep !~ /\Q$biorep\E/) {
|
|
642 push @log, "Found Extract(techrep) vs Sample(biorep) naming mismatch\t${biotechrep}\tvs\t$biorep";
|
|
643 }
|
|
644
|
|
645 if ( ! $echips{$echip->unique_id()}{$chan_type.'_dye'}) {
|
|
646 push @log, "No ".uc($chan_type)." channel found for ExperimentalChip:\t".$echip->unique_id();
|
|
647 }
|
|
648
|
|
649 }
|
|
650
|
|
651 #Is this is really implicit in the test above
|
|
652 if($echips{$echip->unique_id()}{'experimental_biorep'} ne $echips{$echip->unique_id()}{'total_biorep'}){
|
|
653 push @log, "Found biorep mismatch between channels of ExperimentalChip ".$echip->unique_id().":\n".
|
|
654 "\tEXPERIMENTAL\t".$echips{$echip->unique_id()}{'experimental_biorep'}."\tTOTAL\t".
|
|
655 $echips{$echip->unique_id()}{'total_biorep'};
|
|
656 }
|
|
657
|
|
658 #Is this is really implicit in the test above
|
|
659 if($echips{$echip->unique_id()}{'experimental_biotechrep'} ne $echips{$echip->unique_id()}{'total_biotechrep'}){
|
|
660 push @log, "Found biotechrep mismatch between channels of ExperimentalChip ".$echip->unique_id().":\n".
|
|
661 "\tEXPERIMENTAL\t".$echips{$echip->unique_id()}{'experimental_biotechrep'}."\tTOTAL\t".
|
|
662 $echips{$echip->unique_id()}{'total_biotechrep'};
|
|
663 }
|
|
664
|
|
665
|
|
666 }
|
|
667
|
|
668
|
|
669 #Now we need to validate ec has same feature/cell type as other ecs in this br
|
|
670 #this does not handle import sets which ARE allowed to have same name but different types
|
|
671
|
|
672 #warn "Processing ".$echip->unique_id()." $biorep $biotechrep";
|
|
673
|
|
674
|
|
675 if(exists $bio_reps{$biorep}){
|
|
676
|
|
677
|
|
678 if(! defined $bio_reps{$biorep}{'cell_type'}){
|
|
679 push @log, "Found undefined CellType for biorep $biorep";
|
|
680 }
|
|
681 elsif($bio_reps{$biorep}{'cell_type'}->name() ne $echips{$echip->unique_id()}{'cell_type'}){
|
|
682 push @log, "Found CellType mismatch between $biorep and ExperimentalChip ".$echip->unique_id();
|
|
683 }
|
|
684
|
|
685
|
|
686 if(! defined $bio_reps{$biorep}{'feature_type'}){
|
|
687 push @log, "Found undefined FeatureType for biorep $biorep";
|
|
688 }
|
|
689 elsif($bio_reps{$biorep}{'feature_type'}->name() ne $echips{$echip->unique_id()}{'feature_type'}){
|
|
690 push @log, "Found FeatureType mismatch between $biorep and ExperimentalChip ".$echip->unique_id();
|
|
691 }
|
|
692
|
|
693 #warn "$biorep exists with\t".$bio_reps{$biorep}{'cell_type'}->name().' '.$bio_reps{$biorep}{'feature_type'}->name();
|
|
694
|
|
695 #We need to set the tech rep here too!
|
|
696 #Do we need to validate this also, as above.
|
|
697 #This would be overkill due to the inherant nature of the TR to BR relationship
|
|
698
|
|
699 if(! exists $tech_reps{$biotechrep}){
|
|
700 $tech_reps{$biotechrep}{'cell_type'} = $bio_reps{$biorep}{'cell_type'};
|
|
701 $tech_reps{$biotechrep}{'feature_type'} = $bio_reps{$biorep}{'feature_type'};
|
|
702 }
|
|
703
|
|
704
|
|
705 }else{
|
|
706
|
|
707 #warn "Creating new BR $biorep and TR $biotechrep";
|
|
708
|
|
709 if(defined $echips{$echip->unique_id()}{'cell_type'}){
|
|
710
|
|
711 my $cell_type = $ct_adaptor->fetch_by_name($echips{$echip->unique_id()}{'cell_type'});
|
|
712
|
|
713 if(! defined $cell_type){
|
|
714 push @log, 'CellType '.$echips{$echip->unique_id()}{'cell_type'}.' does not exist in the database, please use the import_type.pl script';
|
|
715 }else{
|
|
716 $bio_reps{$biorep}{'cell_type'} = $cell_type;
|
|
717 $tech_reps{$biotechrep}{'cell_type'} = $cell_type;
|
|
718 # warn "Setting ".$echip->unique_id()." $biorep $biotechrep ".$cell_type->name;
|
|
719 }
|
|
720 }else{
|
|
721 warn "No CellType specified for ExperimentalChip:\t".$echip->unique_id()."\n";
|
|
722 }
|
|
723
|
|
724
|
|
725 if(defined $echips{$echip->unique_id()}{'feature_type'}){
|
|
726 my $feature_type = $ft_adaptor->fetch_by_name($echips{$echip->unique_id()}{'feature_type'});
|
|
727
|
|
728 if(! defined $feature_type){
|
|
729 push @log, 'FeatureType '.$echips{$echip->unique_id()}{'feature_type'}.' does not exist in the database, please use the import_type.pl script';
|
|
730 }
|
|
731 else{
|
|
732 $bio_reps{$biorep}{'feature_type'} = $feature_type;
|
|
733 $tech_reps{$biotechrep}{'feature_type'} = $feature_type;
|
|
734
|
|
735 #warn "Setting ".$echip->unique_id()." $biorep $biotechrep ".$feature_type->name;
|
|
736 }
|
|
737 }else{
|
|
738 warn "No FeatureType specified for ExperimentalChip:\t".$echip->unique_id()."\n";
|
|
739 }
|
|
740 }
|
|
741
|
|
742 push @{$tech_reps{$biotechrep}{'echips'}}, $echip->unique_id();
|
|
743 push @{$bio_reps{$biorep}{'echips'}}, $echip->unique_id();
|
|
744 }
|
|
745
|
|
746
|
|
747
|
|
748
|
|
749 if (@log) {
|
|
750 $self->log("MAGE VALIDATION REPORT\n::\t".join("\n::\t", @log));
|
|
751 throw("MAGE VALIDATION FAILED\nPlease correct tab2mage file and try again:\t".$self->get_config('tab2mage_file'));
|
|
752 } else {
|
|
753 $self->log('MAGE VALDIATION SUCCEEDED');
|
|
754 }
|
|
755
|
|
756
|
|
757 #we also need to build the tech rep results sets(not displayable)
|
|
758 #do we need to have result sets for each biorep too?
|
|
759 #update ExperimentalChip replicate info
|
|
760 my (%rsets);
|
|
761 my %types = (
|
|
762 feature => {},
|
|
763 cell => {},
|
|
764 );
|
|
765
|
|
766
|
|
767
|
|
768 #This needs to update and split the import/top level sets so they are of same types
|
|
769 #update ec type here as we have ec context
|
|
770 #careful not to update multiple times, just once for each ec
|
|
771
|
|
772 my $eca = $self->db->get_ExperimentalChipAdaptor();
|
|
773
|
|
774
|
|
775 foreach my $echip (@{$rset->get_ExperimentalChips()}) {
|
|
776 my ($cell_type, $feature_type);
|
|
777
|
|
778 #Set biorep info and rset
|
|
779 foreach my $biorep (keys %bio_reps){
|
|
780
|
|
781 foreach my $chip_uid(@{$bio_reps{$biorep}{'echips'}}){
|
|
782
|
|
783 if($chip_uid eq $echip->unique_id()){
|
|
784 $echip->biological_replicate($biorep);
|
|
785 $cell_type = $bio_reps{$biorep}{'cell_type'};
|
|
786 $feature_type = $bio_reps{$biorep}{'feature_type'};
|
|
787
|
|
788 if(! defined $rsets{$biorep}){
|
|
789
|
|
790 $rsets{$biorep} = Bio::EnsEMBL::Funcgen::ResultSet->new
|
|
791 (
|
|
792 -NAME => $biorep,#this may not be unique, prepend with exp name? Force method to use Experiment_and_name?
|
|
793 -ANALYSIS => $rset->analysis(),
|
|
794 -TABLE_NAME => 'experimental_chip',
|
|
795 -FEATURE_TYPE => $feature_type,
|
|
796 -CELL_TYPE => $cell_type,
|
|
797 );
|
|
798
|
|
799 #record cell and feature types
|
|
800 $types{'feature'}{$feature_type->name()} = $feature_type;
|
|
801 $types{'cell'}{$cell_type->name()} = $cell_type;
|
|
802 $self->log("Created BioRep ResultSet:\t".$rsets{$biorep}->log_label);
|
|
803 }
|
|
804
|
|
805 $rsets{$biorep}->add_table_id($echip->dbID(), $rset->get_chip_channel_id($echip->dbID()));
|
|
806 }
|
|
807 }
|
|
808 }
|
|
809
|
|
810 #reset echip types
|
|
811 $echip->feature_type($feature_type);
|
|
812 $echip->cell_type($cell_type);
|
|
813
|
|
814
|
|
815 #set tech rep info and rset
|
|
816 foreach my $techrep(keys %tech_reps){
|
|
817
|
|
818 foreach my $chip_uid(@{$tech_reps{$techrep}{'echips'}}){
|
|
819
|
|
820 if($chip_uid eq $echip->unique_id()){
|
|
821 $echip->technical_replicate($techrep);
|
|
822
|
|
823 if(! defined $rsets{$techrep}){
|
|
824 $rsets{$techrep} = Bio::EnsEMBL::Funcgen::ResultSet->new
|
|
825 (
|
|
826 -NAME => $techrep,#this may not be unique, prepend with exp name? Force method to use Experiment_and_name?
|
|
827 -ANALYSIS => $rset->analysis(),
|
|
828 -TABLE_NAME => 'experimental_chip',
|
|
829 -FEATURE_TYPE => $tech_reps{$techrep}{'feature_type'},
|
|
830 -CELL_TYPE => $tech_reps{$techrep}{'cell_type'},
|
|
831 );
|
|
832
|
|
833 $self->log("Created TechRep ResultSet:\t".$rsets{$techrep}->log_label);
|
|
834 }
|
|
835 $rsets{$techrep}->add_table_id($echip->dbID(), $rset->get_chip_channel_id($echip->dbID()));
|
|
836 }
|
|
837 }
|
|
838 }
|
|
839
|
|
840 $echip->adaptor->update_replicate_types($echip);#store rep info
|
|
841 }
|
|
842
|
|
843
|
|
844 ### Reset/Update/Clean import sets type fields
|
|
845 my $sql;
|
|
846
|
|
847 if(scalar keys %{$types{'feature'}} >1){
|
|
848 $self->log('Resetting IMPORT FeatureType to NULL for multi-FeatureType Experiment');
|
|
849 $sql = "UPDATE result_set set feature_type_id='NULL' where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
|
|
850
|
|
851 }else{
|
|
852 my ($ftype) = values %{$types{'feature'}};
|
|
853
|
|
854 if(! defined $rset->feature_type()){
|
|
855 $self->log('Updating IMPORT FeatureType to '.$ftype->name());
|
|
856 $sql = "UPDATE result_set set feature_type_id=".$ftype->dbID()." where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
|
|
857 }
|
|
858 elsif($rset->feature_type->dbID ne $ftype->dbID()){
|
|
859 $self->log('WARNING: FeatureType mismatch. Updating IMPORT FeatureType('.$rset->feature_type->name().') to match meta('.$ftype->name.')');
|
|
860 $sql = "UPDATE result_set set feature_type_id=".$ftype->dbID()." where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
|
|
861
|
|
862 }
|
|
863 }
|
|
864
|
|
865 $self->db->dbc->do($sql) if $sql;
|
|
866
|
|
867 undef $sql;
|
|
868
|
|
869 if(scalar keys %{$types{'cell'}} >1){
|
|
870 $self->log('Resetting IMPORT CellType to NULL for multi-CellType Experiment');
|
|
871 my $sql = "UPDATE result_set set cell_type_id='NULL' where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
|
|
872 }else{
|
|
873 my ($ctype) = values %{$types{'cell'}};
|
|
874
|
|
875 if(! defined $rset->cell_type()){
|
|
876 $self->log('Updating IMPORT CellType to '.$ctype->name());
|
|
877 $sql = "UPDATE result_set set cell_type_id=".$ctype->dbID()." where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
|
|
878 }
|
|
879 elsif($rset->cell_type->dbID ne $ctype->dbID()){
|
|
880 $self->log('WARNING: CellType mismatch. Updating IMPORT CellType('.$rset->cell_type->name().') to match meta('.$ctype->name.')');
|
|
881 $sql = "UPDATE result_set set cell_type_id=".$ctype->dbID()." where result_set_id in (".$rset->dbID().', '.$chan_rset->dbID().')';
|
|
882 }
|
|
883 }
|
|
884
|
|
885 $self->db->dbc->do($sql) if $sql;
|
|
886
|
|
887 ### Generate new top level sets here based on br type combos
|
|
888 #we risk duplicating sets here if import set is set to one cell/featuretype
|
|
889 #duplicate anyway, as import is really just for easy tracking of all chips during import
|
|
890
|
|
891 my %toplevel_sets;
|
|
892 my $toplevel_cnt = 1;
|
|
893 #could tidy up toplevel_sets implmentation
|
|
894
|
|
895 foreach my $new_rset(values %rsets){
|
|
896
|
|
897 my $ftype_name = (defined $new_rset->{'feature_type'}) ? $new_rset->{'feature_type'}->name() : undef;
|
|
898 my $ctype_name = (defined $new_rset->{'cell_type'}) ? $new_rset->{'cell_type'}->name() : undef;
|
|
899
|
|
900 if(! exists $toplevel_sets{$ftype_name}){
|
|
901 $toplevel_sets{$ftype_name} = {};
|
|
902 $toplevel_sets{$ftype_name}{'feature_type'} = $new_rset->{'feature_type'};
|
|
903 }
|
|
904
|
|
905
|
|
906
|
|
907 if(! exists $toplevel_sets{$ftype_name}{$ctype_name}){
|
|
908 $toplevel_sets{$ftype_name}{$ctype_name}{'cell_type'} = $new_rset->{'cell_type'};
|
|
909 $toplevel_sets{$ftype_name}{$ctype_name}{'rsets'} = [$new_rset];
|
|
910 }else{
|
|
911 push @{$toplevel_sets{$ftype_name}{$ctype_name}{'rsets'}}, $new_rset;
|
|
912 }
|
|
913 }
|
|
914
|
|
915
|
|
916
|
|
917 #build toplevel sets for each feature/cell type combo using constituent rsets
|
|
918 foreach my $ftype_name(keys %toplevel_sets){
|
|
919
|
|
920 foreach my $ctype_name(keys %{$toplevel_sets{$ftype_name}}){
|
|
921
|
|
922 next if $ctype_name eq 'feature_type';#skip feature type
|
|
923
|
|
924 #we need to give these a different key so we're not overwriting in the rset hash
|
|
925 $rsets{$self->experiment->name().'_'.$toplevel_cnt} = Bio::EnsEMBL::Funcgen::ResultSet->new
|
|
926 (
|
|
927 -NAME => $self->experiment->name(),
|
|
928 -ANALYSIS => $rset->analysis(),
|
|
929 -TABLE_NAME => 'experimental_chip',
|
|
930 -FEATURE_TYPE => $toplevel_sets{$ftype_name}{'feature_type'},
|
|
931 -CELL_TYPE => $toplevel_sets{$ftype_name}{$ctype_name}{'cell_type'},
|
|
932 );
|
|
933
|
|
934 $self->log("Created toplevel ResultSet for:\t". $rsets{$self->experiment->name().'_'.$toplevel_cnt}->log_label);
|
|
935
|
|
936 #add consituent table ids
|
|
937 foreach my $new_rset(@{$toplevel_sets{$ftype_name}{$ctype_name}{'rsets'}}){
|
|
938
|
|
939 foreach my $ec_id(@{$new_rset->table_ids()}){
|
|
940
|
|
941 #Only add it if it has not already been added
|
|
942 if(! $rsets{$self->experiment->name().'_'.$toplevel_cnt}->get_chip_channel_id($ec_id)){
|
|
943 $rsets{$self->experiment->name().'_'.$toplevel_cnt}->add_table_id($ec_id, $new_rset->get_chip_channel_id($ec_id));
|
|
944 }
|
|
945 }
|
|
946 }
|
|
947 $toplevel_cnt++;
|
|
948 }
|
|
949 }
|
|
950
|
|
951 #ResultSet update strategy
|
|
952 #To avoid messyness in resolving result_set differences
|
|
953 #Simply delete all that are not used as supporting sets
|
|
954 #and load new ones, log old supporting rsets for manual
|
|
955 #reassignment and rollback.
|
|
956 #If we have clash between an old set and a new set, rename old
|
|
957 #set and log
|
|
958 #We might not always have the previous data files.
|
|
959 #But we might want to maintain all the previous rsets and just add a new one
|
|
960 #At present this would require acquiring the previous Tab2Mage file
|
|
961 #and adding the new data to it.
|
|
962 #We could do with a way to merge data already in the DB with new meta data to form a new Tab2Mage file
|
|
963 #and validate that
|
|
964
|
|
965
|
|
966 my @previous_rep_sets;
|
|
967 my @supporting_rset_dsets;
|
|
968
|
|
969
|
|
970 #Get non-import Sets
|
|
971 map {push @previous_rep_sets, $_ if $_->name !~ /_IMPORT$/}
|
|
972 @{$rset_adaptor->fetch_all_by_Experiment_Analysis($self->experiment, $chip_anal)};
|
|
973
|
|
974
|
|
975 #rollback_ResultSet if possible?
|
|
976 #This is just checking if they are supporting, not actually rolling them back
|
|
977 if(@previous_rep_sets){
|
|
978 $self->log('Found previously stored ResultSets');
|
|
979
|
|
980 foreach my $prev_rset(@previous_rep_sets){
|
|
981 #This should not rollback anything, just return skipped sets
|
|
982 #i.e. sets which have a product feature set
|
|
983 #It also used to delete the supporting set records which maybe important for redefining the DataSet below
|
|
984 my $rset_dset = $self->rollback_ResultSet($prev_rset);
|
|
985 push @supporting_rset_dsets, $rset_dset if @$rset_dset;
|
|
986 }
|
|
987 }
|
|
988
|
|
989 #Note: If we remove chips from an experiment, they are only removed from the non-import sets
|
|
990 #To fully remove them, you need to use the rollback_experiment.pl script with -chip_ids
|
|
991 #can we log this in get_import_ResultSet?
|
|
992
|
|
993 $self->log('Storing ResultSets');
|
|
994
|
|
995 #Store new tech, biol and toplevel type rsets
|
|
996 foreach my $new_rset(values %rsets){
|
|
997 my $replace_txt;
|
|
998
|
|
999 #Rename old set if we have a name/anal/type clash
|
|
1000 foreach my $prs(@supporting_rset_dsets){
|
|
1001
|
|
1002 my ($pset, $dset) = @$prs;
|
|
1003
|
|
1004 if($pset->log_label eq $new_rset->log_label){
|
|
1005 my $new_name = "OLD_".$rset->log_label;
|
|
1006 $self->log("Found update supporting ResultSet clash, renaming to:\t${new_name}");
|
|
1007 $self->unlink_ResultSet_DataSet($rset, $dset, $new_name);
|
|
1008
|
|
1009 #This pset dbID has already been removed
|
|
1010 #Will get updated with new rset dbID when updating DataSet
|
|
1011 $replace_txt = 'Proposed ResultSet(dbID) replacement for DataSet('.$dset->name."):\t".$pset->dbID.' > ';
|
|
1012 }
|
|
1013 }
|
|
1014
|
|
1015
|
|
1016 $new_rset->add_status('DAS_DISPLAYABLE');
|
|
1017 my ($new_rset) = @{$rset_adaptor->store($new_rset)};
|
|
1018
|
|
1019 if(defined $replace_txt){
|
|
1020 $self->log($replace_txt.$new_rset->dbID);
|
|
1021 }
|
|
1022 }
|
|
1023
|
|
1024 my $xml_file = open_file($self->get_config('mage_xml_file'));
|
|
1025
|
|
1026 #slurp in changing separator to null so we get it all in one string.
|
|
1027 $self->experiment->mage_xml(do{ local ($/); <$xml_file>});
|
|
1028 close($xml_file);
|
|
1029
|
|
1030 $self->experiment($self->db->get_ExperimentAdaptor->update_mage_xml_by_Experiment($self->experiment()));
|
|
1031 }
|
|
1032 }
|
|
1033
|
|
1034 return;
|
|
1035 }
|
|
1036
|
|
1037
|
|
1038 1;
|