Mercurial > repos > mahtabm > ensembl

diff variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/Nimblegen.pm @ 0:1f6dce3d34e0
Uploaded
author: mahtabm
date: Thu, 11 Apr 2013 02:01:53 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/Nimblegen.pm	Thu Apr 11 02:01:53 2013 -0400
@@ -0,0 +1,1221 @@
+#
+# EnsEMBL module for Bio::EnsEMBL::Funcgen::Parsers::Nimblegen
+#
+
+=head1 LICENSE
+
+  Copyright (c) 1999-2011 The European Bioinformatics Institute and
+  Genome Research Limited.  All rights reserved.
+
+  This software is distributed under a modified Apache license.
+  For license details, please see
+
+    http://www.ensembl.org/info/about/code_licence.html
+
+=head1 CONTACT
+
+  Please email comments or questions to the public Ensembl
+  developers list at <ensembl-dev@ebi.ac.uk>.
+
+  Questions may also be sent to the Ensembl help desk at
+  <helpdesk@ensembl.org>.
+
+=head1 NAME
+
+Bio::EnsEMBL::Funcgen::Parsers::Nimblegen
+
+=head1 SYNOPSIS
+
+  my $parser_type = "Bio::EnsEMBL::Funcgen::Parsers::Nimblegen";
+  push @INC, $parser_type;
+  my $imp = $class->SUPER::new(@_);
+
+
+=head1 DESCRIPTION
+
+This is a parser class which should not be instatiated directly, it 
+normally set by the Importer as the parent class.  Nimblegen contains meta 
+data and methods specific to NimbleGen arrays to aid parsing and importing of 
+experimental data.
+
+=cut
+
+package Bio::EnsEMBL::Funcgen::Parsers::Nimblegen;
+
+use Bio::EnsEMBL::Funcgen::Array;
+use Bio::EnsEMBL::Funcgen::ProbeSet;
+use Bio::EnsEMBL::Funcgen::Probe;
+use Bio::EnsEMBL::Funcgen::ProbeFeature;
+use Bio::EnsEMBL::Funcgen::FeatureType;
+use Bio::EnsEMBL::Funcgen::ExperimentalChip;
+use Bio::EnsEMBL::Funcgen::ArrayChip;
+use Bio::EnsEMBL::Funcgen::Channel;
+use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate );
+use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw(species_chr_num open_file);
+use Bio::EnsEMBL::Utils::Argument qw( rearrange );
+use Bio::EnsEMBL::Funcgen::Parsers::MAGE;
+
+use strict;
+
+use vars qw(@ISA);
+@ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::MAGE);
+
+=head2 new
+
+  Example    : my $self = $class->SUPER::new(@_);
+  Description: Constructor method for Nimblegen class
+  Returntype : Bio::EnsEMBL::Funcgen::Parser::Nimblegen
+  Exceptions : throws if Experiment name not defined or if caller is not Importer
+  Caller     : Bio::EnsEMBL::Funcgen::Importer
+  Status     : at risk
+
+=cut
+
+
+sub new{
+  my $caller = shift;
+
+  my $class = ref($caller) || $caller;
+  my $self  = $class->SUPER::new(@_);
+
+  throw("This is a skeleton class for Bio::EnsEMBL::Importer, should not be used directly") if(! $self->isa("Bio::EnsEMBL::Funcgen::Importer"));
+	
+  #should we provide args override for all of these?
+
+  $self->{'config'} =  
+    {(
+      #order of these data arrays is important!
+	  #Remove these method arrays, snd just run them serially?
+      array_data   => ['experiment'],#Rename this!!
+      probe_data   => ["probe"],
+      results_data => ["and_import_results"],
+      sample_key_fields => ['DESIGN_ID', 'CHIP_ID', 'DYE', 'PROMOT_SAMPLE_TYPE'],# 'SAMPLE_LABEL'],label now optional
+      # 'SAMPLE_DESCRIPTION removed due to naming disparities
+      ndf_fields      => ['CONTAINER', 'PROBE_SEQUENCE', 'MISMATCH','FEATURE_ID', 'PROBE_ID'],#MISMATCH is always 0!
+      pos_fields      => ['CHROMOSOME', 'PROBE_ID', 'POSITION', 'COUNT'],
+      result_fields   => ['PROBE_ID', 'PM', 'X', 'Y'],
+      notes_fields   => ['DESIGN_ID', 'DESIGN_NAME', 'DESCRIPTION'],
+      norm_method => 'VSN_GLOG',
+      dye_freqs => {(
+		     Cy5 => 635,
+		     Cy3 => 532,
+		    )},
+
+
+	  #Need to make these definable?
+	  #have protocolfile arg and just parse tab2mage protocol section format
+	  protocols => {(
+					 grow          => {(
+										accession => 'GROW_NIMB',
+										name      => 'GROW NIMBLEGEN CULTURE CONDITIONS',
+										text      => 'Nimblegen culture conditions description here. Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
+										paramters => undef,
+									   )},
+					 treatment     => {(
+										accession => 'CROSSLINK_NIMB',
+										name      => 'NIMBLEGEN CHROMATIN PREPARATION',
+										text      => 'Nimblegen X-linking and DNA extraction protocol.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
+										paramters => undef,
+									   )},
+					 extraction    => {(
+										accession => 'CHROMATIN_IP_NIMB',
+										name      => 'NIMBLEGEN CHROMATIN IMMUNOPRECIPITATION and DNA RECOVERY',
+										text      => 'Nimblegen chromatin immunoprecipitation and DNA extraction protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
+										paramters => undef,
+									   )},
+					 labeling      => {(
+										accession => 'LABELLING_NIMB',
+										name      => 'NIMBLEGEN LABELLING',
+										text      => 'Nimblegen labelling protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
+										paramteres => undef,
+									   )},
+					 hybridization => {(
+										accession => 'HYBRIDISATION_NIMB',
+										name      => 'NIMBLEGEN HYBRIDISATION',
+										text      => 'Nimblegen chip hybridisation protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
+										parameters => undef,
+									   )},
+					 scanning      => {(
+										accession => 'SCANNING_NIMB',
+										name      => 'NIMBLESCAN',
+										text      => 'Nimblegen Nimblescan protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
+										paramters => undef,
+									   )},
+					)},
+
+     )};
+	
+  return $self;
+}
+
+
+=head2 set_config
+
+  Example    : my $self->set_config;
+  Description: Sets attribute dependent config
+  Returntype : None
+  Exceptions : None
+  Caller     : Bio::EnsEMBL::Funcgen::Importer
+  Status     : at risk
+
+=cut
+
+
+sub set_config{
+  my $self = shift;
+
+  #This should be general config for all types of import
+  #dirs are not set in config to enable generic get_dir method access
+  #This is really just setting paths rather than config rename?
+
+  #This is generic for all imports
+
+
+
+  if($self->{'old_dvd_format'}){
+	$self->{'design_dir'} = $self->get_dir('input').'/DesignFiles';
+  }else{
+	$self->{'design_dir'} = $self->get_dir('input').'/Design_information';
+  }
+
+  
+  if($self->{'old_dvd_format'}){
+	$self->{'config'}{'notes_file'} = $self->get_dir('input').'/DesignNotes.txt';
+  }else{
+	$self->{'config'}{'notes_file'} = $self->get_dir('design').'/DesignNotes.txt';
+  }
+  
+  $self->{'config'}{'chip_file'} = $self->get_dir('input').'/SampleKey.txt';
+
+
+
+  #Experiment(output) specific
+  #This should already be set in the run script
+  #As we could get log write errors before we have created the output dir otherwise
+  $self->{'output_dir'} ||= $self->get_dir("data").'/output/'.$self->{'param_species'}.'/'.$self->vendor().'/'.$self->name();
+
+  $self->{'config'}{'tab2mage_file'} = $self->get_dir('output').'/E-TABM-'.$self->name().'.txt';
+
+  $self->{'config'}{'mage_xml_file'} = $self->get_dir('output').'/{UNASSIGNED}.xml';
+
+  if($self->{'old_dvd_format'}){
+	$self->{'results_dir'} = $self->get_dir('input').'/PairData';
+  }else{
+	$self->{'results_dir'} = $self->get_dir('input').'/Raw_data_files';
+  }
+	
+  return;
+}
+
+
+
+
+=head2 read_array_data
+
+  Example    : $imp->read_array_data();
+  Description: Parses NimbleGen DesignNotes.txt files to create and store new Arrays
+  Returntype : none
+  Exceptions : None
+  Caller     : general
+  Status     : At risk - Can this be generic? Can we force the creation of a DesignNotes file on other formats?
+
+=cut
+
+
+sub read_array_data{
+  my ($self, $notes_file) = @_;
+
+
+  $notes_file ||= $self->get_config('notes_file');
+  my ($line, $array, $array_chip, @data, %hpos);
+  my $oa_adaptor = $self->db->get_ArrayAdaptor();
+  my $ac_adaptor = $self->db->get_ArrayChipAdaptor();
+
+  #Slurp file to string, sets local delimtter to null and subs new lines
+  my $fh = open_file($notes_file);
+  #($design_desc = do { local ($/); <$fh>;}) =~ s/\r*\n$//;
+  #close($fh);
+  
+  #Would be better if we only import the design info for the chips listed in the SampleKey.txt file
+  #Some cyclical dependency going on here :|
+
+  while ($line = <$fh>){
+    
+    $line =~ s/\r*\n//;#chump
+    @data =  split/\t/o, $line;
+
+    #We need to have a DESIGN vendor type?
+    #also need to be able to set file path independently of config
+    
+    if($. == 1){
+	  %hpos = %{$self->set_header_hash(\@data, $self->get_config('notes_fields'))};
+      next;
+    }
+
+    ### CREATE AND STORE Array and ArrayChips  
+    if(! defined $array ){
+      #This is treating each array chip as a separate array, unless arrayset is defined
+      #AT present we have no way of differentiating between different array_chips on same array???!!!
+      #Need to add functionality afterwards to collate array_chips into single array
+           
+      #This will use a stored array if present
+
+      $array = Bio::EnsEMBL::Funcgen::Array->new
+	(
+	 -NAME        => $self->array_name() || $data[$hpos{'DESIGN_NAME'}],
+	 -FORMAT      => uc($self->format()),
+	 -VENDOR      => uc($self->vendor()),
+	 -TYPE        => 'OLIGO',
+	 -DESCRIPTION => $data[$hpos{'DESCRIPTION'}],#need to trim the array chip specific description here
+	);
+
+      ($array) = @{$oa_adaptor->store($array)};
+
+      $array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
+							  -ARRAY_ID  => $array->dbID(),
+							  -NAME      => $data[$hpos{'DESIGN_NAME'}],
+							  -DESIGN_ID => $data[$hpos{'DESIGN_ID'}],
+							  #add description?
+							 );
+
+      #This will use a stored array_chip if present
+      ($array_chip) = @{$ac_adaptor->store($array_chip)};
+      $array->add_ArrayChip($array_chip);
+        
+    }
+    elsif((! $array->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])) && ($self->array_set())){
+      
+      $self->log("Generating new ArrayChip(".$data[$hpos{'DESIGN_NAME'}].") for same Array:\t".$array->name()."\n");
+      
+      $array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
+							  -ARRAY_ID  => $array->dbID(),
+							  -NAME        => $data[$hpos{'DESIGN_NAME'}],
+							  -DESIGN_ID => $data[$hpos{'DESIGN_ID'}],
+							 );
+      
+      ($array_chip) = @{$ac_adaptor->store($array_chip)};
+      $array->add_ArrayChip($array_chip);
+      
+    }
+    elsif(! $array->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])){
+      throw("Found experiment with more than one design without -array_set");
+    }
+  }
+  
+
+  $self->add_Array($array);
+
+  close($fh);
+  
+  return;
+
+}
+
+
+=head2 read_experiment_data
+
+  Example    : $imp->read_array_chip_data();
+  Description: Parses and imports array & experimental chip meta data/objects
+  Returntype : none
+  Exceptions : throws if more than one array/design found and not an "array set"
+  Caller     : Importer
+  Status     : At risk 
+
+=cut
+
+sub read_experiment_data{
+  my $self = shift;
+
+  $self->read_array_data();
+
+  my $t2m_file = $self->init_tab2mage_export() if $self->{'write_mage'};
+
+
+  my ($design_desc, $line, $tmp_uid, $channel, $echip, $sample_label);
+  my ($sample_desc, %hpos, @data, %uid_reps, %did_reps, %sample_reps);
+  my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor();
+  my $chan_adaptor = $self->db->get_ChannelAdaptor();
+  my $br_cnt = 1;
+  my $tr_cnt = 1;
+  
+  #Currently 1 design = 1 chip = 1 array /DVD
+  #Different designs are not currently collated into a chip_set/array in any ordered manner
+  #Register each design as an array and an array_chip
+  #May want to group array_chips into array/chip sets by association though the API
+  
+ 
+  warn("Harcoded for one array(can have multiple chips from the same array) per experiment\n");
+  my $fh = open_file($self->get_config("chip_file"));
+  $self->log("Reading chip data");
+
+
+  #warn "Do we need to validate each line here against the header array?";
+
+  while ($line = <$fh>){
+    next if $line =~ /^\s+\r*\n/;
+    $line =~ s/\r*\n//;#chump
+    @data =  split/\t/o, $line;
+    #we could validate line against scalar of header array
+     #ORD_ID  CHIP_ID DYE     DESIGN_NAME     DESIGN_ID       SAMPLE_LABEL    SAMPLE_SPECIES  SAMPLE_DESCRIPTION      TISSUE_TREATMENT        PROMOT_SAMPLE_TYPE
+    if ($. == 1){
+      %hpos = %{$self->set_header_hash(\@data, $self->get_config('sample_key_fields'))};
+
+      #we need to set the sample description field name, as it can vary :(((
+      @data = grep(/SAMPLE_DESCRIPTION/, keys %hpos);
+      $sample_desc = $data[0];
+ 
+      throw("More than one sample description(@data) in ".$self->get_config("chip_file")."\n") if(scalar @data >1);
+      next;
+    }
+    
+    #Need to handle array class here i.e. two channel arrays will have two lines
+    #validate species here
+    #look up alias from registry and match to self->species
+    #registry may not be loaded for local installation
+
+
+    ### CREATE AND STORE ExperimentalChips
+    if ((! $tmp_uid) || ($data[$hpos{'CHIP_ID'}] ne $tmp_uid)){
+
+      #Test both channels are available, i.e. the SampleKey has two TOTAL channels
+      if($echip){
+
+		for my $type('TOTAL', 'EXPERIMENTAL'){
+
+		  my $test_chan =  $chan_adaptor->fetch_by_type_experimental_chip_id($type, $echip->dbID());
+		  throw("ExperimentalChip(".$echip->unique_id().
+				") does not have a $type channel, please check the SampleKey.txt file") if ! $test_chan;
+		  
+		}
+	  }
+      
+      $tmp_uid = $data[$hpos{'CHIP_ID'}];
+      $echip = $ec_adaptor->fetch_by_unique_id_vendor($data[$hpos{'CHIP_ID'}], 'NIMBLEGEN');
+	  
+      if($echip){
+
+		if(! $self->recovery()){
+		  throw("ExperimentalChip(".$echip->unqiue_id().
+				" already exists in the database\nMaybe you want to recover?");
+		}
+	  }else{
+
+		$echip =  Bio::EnsEMBL::Funcgen::ExperimentalChip->new
+		  (
+		   -EXPERIMENT_ID  => $self->experiment->dbID(),
+		   -DESCRIPTION    => $data[$hpos{$sample_desc}],
+           -FEATURE_TYPE   => $self->feature_type,
+           -CELL_TYPE      => $self->cell_type,
+		   -ARRAY_CHIP_ID  => $self->arrays->[0]->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])->dbID(),
+		   -UNIQUE_ID      => $data[$hpos{'CHIP_ID'}],
+           #-BIOLOGICAL_REPLICATE => ,
+           #-TECHNICAL_REPLICATE => ,
+		  );
+		
+		($echip) = @{$ec_adaptor->store($echip)};
+		$self->experiment->add_ExperimentalChip($echip);
+      }
+    }
+
+
+    ### CREATE AND STORE Channels
+	my $type = uc($data[$hpos{'PROMOT_SAMPLE_TYPE'}]);
+	my $sample_label = (! exists $hpos{'SAMPLE_LABEL'}) ? '' :  $data[$hpos{'SAMPLE_LABEL'}];
+
+
+	$type = 'TOTAL' if ($type ne 'EXPERIMENTAL');
+    $channel = $chan_adaptor->fetch_by_type_experimental_chip_id($type, $echip->dbID());
+
+    if($channel){
+      if(! $self->recovery()){
+		throw("Channel(".$echip->unqiue_id().":".uc($data[$hpos{'PROMOT_SAMPLE_TYPE'}]).
+			  " already exists in the database\nMaybe you want to recover?");
+	  }else{
+		#push @{$self->{'_rollback_ids'}}, $channel->dbID();
+		#No point in doing this as all Channels mey be pre-registered in recovery mode
+		#Hence all will be rolled back
+	  }
+    }else{
+      #Handles single/mutli
+   	  
+      $channel =  Bio::EnsEMBL::Funcgen::Channel->new
+		(
+		 -EXPERIMENTAL_CHIP_ID => $echip->dbID(),
+		 -DYE                  => $data[$hpos{'DYE'}],
+		 -SAMPLE_ID            => $sample_label,
+		 -TYPE                 => $type,
+		);
+    
+      #-SPECIES              => $self->species(),#on channel/sample to enable multi-species chip/experiment
+      #would never happen on one chip?  May happen between chips in one experiment
+	  
+      ($channel) = @{$chan_adaptor->store($channel)};
+
+	}
+
+	#we need to build the channel level tab2mage line here
+
+	#For each BR there will be two sample_labels, one for each channel
+	#These will be used across multiple chips.
+	#If two chips the same design ID and the same sample labels, then we have a technical replicate
+	#else if they have different sample labels then we have another biological replicate
+	#We have a problem of associating channels to the same BR with differing sample labels
+	#This is solved by checking whether the chip ID has already been registered in a BR
+
+	#This fails if more than one sample label is used for any given BR
+	#This will result in the BR being split into the number of unique sample label pairs(Experimental/Control  channel)
+	#This also fails if the same sample label has been used for two different BRs
+
+	if($self->{'write_mage'}){
+	  #my $sample_name = ($sample_label eq '') ? '???' : substr($sample_label, 0, (length($sample_label)-1));
+	  my $ctype_name = (defined $self->cell_type()) ? $self->cell_type->name() : '???';
+	  my $ftype_name = (defined $self->feature_type()) ? $self->feature_type->name() : '???';
+	  my $ctype_desc = (defined $self->cell_type()) ? $self->cell_type->description() : '???';
+
+	  #define reps
+
+
+	  #we need one to get the biorep based on the sample label and making sure the unique ID are the same
+	  #we need to define the tech rep by matching the sample label and the making sure the design_id isn't already used
+
+	  #Is this doing the BR assignment properly?
+
+	  if(exists $sample_reps{$sample_label}){#Found chip in a previously seen BR
+		#Register the BR of this chip ID
+		$uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'} = $sample_reps{$sample_label};
+
+	  }
+	  elsif(exists $uid_reps{$data[$hpos{'CHIP_ID'}]}){#Found the other channel
+		$sample_reps{$sample_label} = $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'};
+	  }
+	  else{#assign new br
+		$sample_reps{$sample_label} = $br_cnt;                  #Assign BR to sample label
+		$uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'} = $br_cnt;     #Assign BR to chip id
+		$br_cnt++;
+	  }
+
+
+
+	  #Something is going awry here. The TR is not being reset for some new BRs
+	  
+	  if(! exists $uid_reps{$data[$hpos{'CHIP_ID'}]}{'tr'}){
+		#we only assign a new tr here if this design has not been seen in any of the reps
+		#i.e. we need to get the first tr which does not contain this design_id
+
+		my $create_rep = 1;
+		my $tr;
+		my @chip_ids;
+		my $br = 	$uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'};
+
+		foreach my $chip_id(keys %uid_reps){
+		  
+		  push @chip_ids, $chip_id if($uid_reps{$chip_id}{'br'} == $br);
+		}
+
+		#This is looping through all the TRs for all the design IDs
+	   
+		foreach my $rep(sort keys %did_reps){
+		  #Doesn't exist for the given BR?
+		  #So we need to get all the chip_ids for a given br
+		  #Check wether it exists and wether it exists in did_reps and check wether the chip_id value is part of the BR set
+		  #else we add it
+																				 
+		  if(! exists $did_reps{$rep}{$data[$hpos{'DESIGN_ID'}]}){
+			#Not seen in a TR of this $rep yet
+			$create_rep = 0;
+		  }elsif(! grep(/$did_reps{$rep}{$data[$hpos{'DESIGN_ID'}]}/, @chip_ids)){
+			#Not seen in this BR with this TR $rep
+			$create_rep = 0;
+		  }
+
+		  if(! $create_rep){
+			#Design ID not seen so add to this TR
+			$did_reps{$rep}{$data[$hpos{'DESIGN_ID'}]} = $data[$hpos{'CHIP_ID'}]; #don't really need to assign this
+			$tr = $rep;
+			last;#do not remove this or we get wierd TR incrementing
+		  }
+		}
+
+		
+		if($create_rep){
+		  #Get the next TR value for this given BR
+		  my @trs;
+
+		  foreach my $rep(keys %did_reps){
+			
+			foreach my $chip_id(values %{$did_reps{$rep}}){
+			  
+			  #Push TR if chip_id is present in this BR
+			  push @trs, $rep if(grep(/$chip_id/, @chip_ids));
+			}
+		  }
+		  ($tr) = sort {$b<=>$a} @trs;
+
+		  $tr ||=0;
+		  $tr++;
+		  
+		  #register design ID to chip ID mapping for this TR
+		  $did_reps{$tr}{$data[$hpos{'DESIGN_ID'}]} = $data[$hpos{'CHIP_ID'}];
+		}
+
+		#register TR for this chip ID
+		$uid_reps{$data[$hpos{'CHIP_ID'}]}{'tr'} = $tr;
+	  }
+
+	  my $br = $self->experiment->name().'_BR'.	$uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'};
+	  my $tr = $br.'_TR'.$uid_reps{$data[$hpos{'CHIP_ID'}]}{'tr'};
+
+
+	  #File[raw]
+	  my $tsm_line = $echip->unique_id().'_'.$self->get_config('dye_freqs')->{$data[$hpos{'DYE'}]}.'_pair.txt';
+	  #Array[accession] # Should this be left blank for AE accession?
+	  $tsm_line .= "\t".$data[$hpos{'DESIGN_ID'}];
+	  #Array[serial]
+	  $tsm_line .= "\t".$echip->unique_id();
+
+	  #Protocol(s)[grow][treatment][extraction][labelling][hybridisation][scanning]
+	  foreach my $protocol(sort (keys %{$self->get_config('protocols')})){
+		$tsm_line .= "\t".$self->get_config('protocols')->{$protocol}->{'accession'};
+	  }
+
+	  
+	  #BioSource
+	  $tsm_line .= "\t$ctype_name";
+	  #Sample
+	  $tsm_line .= "\t$br";
+	  #Extract 
+	  $tsm_line .= "\t$tr";
+
+	  #LabeledExtract & Immunoprecipitate
+	  if($type eq 'EXPERIMENTAL'){
+		$tsm_line .= "\t$sample_label - IP of $tr with anti $ftype_name (Ab vendor, Ab ID)";
+		$tsm_line .= "\t$tr IP";
+	  }else{
+		$tsm_line .= "\t$sample_label - Input control DNA of $tr\t";
+	  }
+		
+	  #Hybridization	
+	  #U2OS BR1_TR1 ChIP H3KAc 46092 hyb
+	  $tsm_line .= "\t$ctype_name $tr ChIP $ftype_name ".$echip->unique_id().' hyb';
+	  
+	  #BioSourceMaterial    SampleMaterial	ExtractMaterial	LabeledExtractMaterial
+	  $tsm_line .= "\tcell\tgenomic_DNA\tgenomic_DNA\tsynthetic_DNA";
+	  
+	  #Dye	
+	  $tsm_line .= "\t".$data[$hpos{'DYE'}];
+
+	  #BioMaterialCharacteristics[Organism]
+	  $tsm_line .= "\t".$self->species();
+	  
+	  #BioMaterialCharacteristics[BioSourceType]
+	  $tsm_line .= "\tfrozen_sample";
+
+	  #BioMaterialCharacteristics[StrainOrLine]	
+	  $tsm_line .= "\t$ctype_name";
+
+	  #BioMaterialCharacteristics[CellType]
+	  $tsm_line .= "\t$ctype_name";
+
+	  #BioMaterialCharacteristics[Sex]
+	  $tsm_line .= "\t???";
+	  #FactorValue[StrainOrLine]	
+	  $tsm_line .= "\t$ctype_name";
+	  #FactorValue[Immunoprecipitate]
+	  $tsm_line .=  ($type eq 'EXPERIMENTAL') ? "\tanti-${ftype_name} antibody\n" : "\t\n";
+
+	  print $t2m_file $tsm_line;
+
+	}
+   
+  }
+  
+  close($t2m_file) if $self->{'write_mage'};
+  close($fh);
+
+  return;
+}
+
+
+
+
+=head2 read_probe_data
+
+  Example    : $imp->read_probe_data();
+  Description: Parses and imports probes, probe sets and features of a given array
+               No duplicate handling or probe caching is performed due to memory 
+               issues, this is done in resolve_probe_data.
+  Returntype : none
+  Exceptions : none
+  Caller     : Importer
+  Status     : Medium
+
+=cut
+
+
+#Assumes one chip_design per experimental set.
+sub read_probe_data{
+  my ($self) = shift;
+  
+  my ($fh, $line, @data, %hpos, %probe_pos);#, %duplicate_probes);
+  $self->log("Parsing and importing ".$self->vendor()." probe data (".localtime().")", 1);
+
+  ### Read in
+  #ndf file: probe_set, probe and probe_feature(.err contains multiple mappings)
+  #pos file: probe chromosome locations
+  
+  #Need to change how probe_names are generated for nimblegen?
+  #native probe_ids may not be unique, but should be when combined with the seq_id which is currently being used as the xref_id
+  #Handle with API!!
+  
+  #READ REGION POSITIONS
+  #We need to handle different coord systems and possibly different assmemblies
+  my $slice_a = $self->db->get_SliceAdaptor();
+  my $cs = $self->db->get_FGCoordSystemAdaptor()->fetch_by_name('chromosome');
+
+
+  #TIED FILE CACHE!!!
+  #We need to rebuild the cache from the DB before we start adding new probe info
+  #We only need to rebuild cache if we find a chip that hasn't been imported?
+  #No, we just need to import without cache, then re-do the resolve step
+  #Are we still going to get disconnects when we dump the cache?
+
+  
+  #warn "Read probe data should only read in the array chips which are specified by the ExperimentalChip?  Not just what is present in the DesignNotes file?";
+
+  
+  foreach my $array(@{$self->arrays()}){
+    
+    foreach my $achip(@{$array->get_ArrayChips()}){
+
+      my (@log, %probe_pos, $fasta_file, $f_out);
+	  #do we need to fetch probe by seq and array?
+	  #this would also id non-unique seqs in design
+
+      #warn "We need to account for different cs feature amppings here
+
+	  if($achip->has_status('IMPORTED')){
+		$self->log("Skipping fully imported ArrayChip:\t".$achip->design_id());
+		next;
+      }elsif($self->recovery()){
+		$self->log("Rolling back ArrayChip:\t".$achip->design_id());
+		$self->rollback_ArrayChips([$achip]);
+      }
+      
+      $self->log("Importing ArrayChip:".$achip->design_id());
+       
+      #Always use pos file, ndf file cannot be guranteed to contain all location info
+      #pos file also gives a key to which probes should be considered 'EXPERIMENTAL'
+      
+      #CACHE PROBE POSITIONS
+      $fh = open_file($self->get_dir("design")."/".$achip->name().".pos");
+      
+      #don't % = map ! Takes a lot longer than a while ;)
+      while($line = <$fh>){
+		$line =~ s/\r*\n//o;#Not using last element
+		@data =  split/\t/o, $line;
+	
+		#SEQ_ID	CHROMOSOME	PROBE_ID	POSITION	COUNT
+		if ($. == 1){
+		  %hpos = %{$self->set_header_hash(\@data, $self->get_config('pos_fields'))};
+		  next;
+		}
+		
+		#Skip probe if there is a duplicate
+		if(exists $probe_pos{$data[$hpos{'PROBE_ID'}]}){
+		  
+		  if($data[$hpos{'CHROMOSOME'}] eq $probe_pos{$data[$hpos{'PROBE_ID'}]}->{chr} &&
+			 ($data[$hpos{'POSITION'}]+1) eq $probe_pos{$data[$hpos{'PROBE_ID'}]}->{start}){
+			#log or warn here?
+			
+			#Not matching probe length here
+			
+			next;
+			#Do we need to skip this in the ndf file too?
+
+		  }
+		  else{
+			throw("Found duplicate mapping for ".$data[$hpos{'PROBE_ID'}].
+				  " need implement duplicate logging/cleaning");
+		  }
+		  #need to build duplicate hash to clean elements from hash
+		  # $duplicate_probes{$data[$hpos{'PROBE_ID'}]} = 1;
+		  #next;
+		}
+		
+		
+		my $random = 0;
+
+		if(! $self->cache_slice($data[$hpos{'CHROMOSOME'}])){
+		  push @log, "Skipping feature import for probe ".$data[$hpos{'PROBE_ID'}]." with non-standard region ".$data[$hpos{'CHROMOSOME'}];
+
+		  #should we try and resolve the random chrs here?
+		  #at least store probe/set/result and skip feature
+		  #can we just import as chr with no start postition?
+		  
+		  #if ($data[$hpos{'CHROMOSOME'}] =~ /_random/){
+		  
+		  #	if(! $self->cache_slice($data[$hpos{'CHROMOSOME'}])){
+		   #push @log, "Skipping probe ".$data[$hpos{'PROBE_ID'}]." with non-standard region ".$data[$hpos{'CHROMOSOME'}];
+			#}else{
+		  #we should really log this in a seprate file to avoid overloading the lgo file
+		  # push @log, "Importing random probe ".$data[$hpos{'PROBE_ID'}]." on ".$data[$hpos{'CHROMOSOME'}]." omitting position";
+		  
+		#}
+
+		#}
+		  
+		}
+		
+		#This is not handling probes with random chrs
+
+		$probe_pos{$data[$hpos{'PROBE_ID'}]} = {(
+												 chr => $data[$hpos{'CHROMOSOME'}],
+												 start => ($data[$hpos{'POSITION'}] +1),#default UCSC->Ensembl coord conversion
+												)};
+		
+      }
+
+
+	  
+      
+      #Remove duplicate probes 
+	  
+	  $self->log("Built position cache from : ".$achip->name().".pos", 1);
+      close($fh);
+      
+	  $self->log("Importing design probes from : ".$achip->name().".ndf");
+      #OPEN PROBE IN/OUT FILES
+      $fh = open_file($self->get_dir("design")."/".$achip->name().".ndf");
+      #Need to set these paths in each  achip hash, file names could be tablename.chip_id.txt
+ 
+	  #Need to add dbname/port/species/vendor to this path?
+	  #.efg DropDatabase should also clean the fasta dumps and caches for a given DB
+	  
+	  if($self->dump_fasta()){
+		$fasta_file = $self->get_dir('fastas').'/'.$achip->name().".fasta";
+		$self->backup_file($fasta_file);
+		$f_out = open_file($fasta_file, '>');
+	  }
+
+
+      my ($length, $ops, $op, $of, %pfs);
+
+      #should define mapping_method arg to allows this to be set to LiftOver/EnsemblMap
+      my $anal = $self->db->get_AnalysisAdaptor()->fetch_by_logic_name("VendorMap");
+			
+		
+      my $strand = 0;	#default for nimblegen, should be config hash?
+
+      #my $cig_line = "50M";	#default for nimblegen, should be config hash?
+	  #probe length can change within design, should be built from length
+
+
+      my $fasta = "";
+      
+      #$self->Timer()->mark("Starting probe loop");
+      
+
+	  #This is leaking about 30-60MB for each normal density chip?
+	  #need Devel::Monitor here?
+
+
+      while($line = <$fh>){
+		$line =~ s/\r*\n//;
+		@data =  split/\t/o, $line;
+		my $loc = "";
+		my $class = "EXPERIMENTAL";
+		
+		#PROBE_DESIGN_ID	CONTAINER	DESIGN_NOTE	SELECTION_CRITERIA	SEQ_ID	PROBE_SEQUENCE	MISMATCH	MATCH_INDEX	FEATURE_ID	ROW_NUM	COL_NUM	PROBE_CLASS	PROBE_ID	POSITION	DESIGN_ID	X	Y
+		#2067_0025_0001  BLOCK1          0       chrX    TTAGTTTAAAATAAACAAAAAGATACTCTCTGGTTATTAAATCAATTTCT      0       52822449        52822449        1       25      experimental    chrXP10404896   10404896        2067    25      1
+		
+		if ($. == 1){	
+		  %hpos = %{$self->set_header_hash(\@data, $self->get_config('ndf_fields'))};
+		  next;
+		}
+		
+		
+		if (! exists $probe_pos{$data[$hpos{'PROBE_ID'}]}){
+		  push @log, "Skipping non-experimental probe:\t".$data[$hpos{'PROBE_ID'}];
+		  next;
+		}
+		
+		#Which non-experimental probes might we want to store?
+		#if($data[$hpos{'CONTAINER'}] =~ /control/io){
+		#	$class = "CONTROL";
+		#}
+		#elsif($data[$hpos{'CONTAINER'}] =~ /random/io){
+		#	$class = "RANDOM";
+		#}
+		#elsif($data[$hpos{'PROBE_CLASS'}] !~ /experimental/io){
+		#	$class = "OTHER";
+		#}
+		#elsif(! exists $probe_pos{$data[$hpos{'PROBE_ID'}]}){	#HACKY HACKY HACK HACK!! Needed for valid region retrival
+		#  $class = "OTHER";
+		#}
+		#SPIKE INS?
+		
+		
+		
+		#This assumes all probes in feature/probeset are next to each other!!!!!!!!!!
+		
+		
+		if($data[$hpos{'FEATURE_ID'}] != $data[$hpos{'MATCH_INDEX'}]){#Probe set data
+		  #print "Generating new probeset:\tFeature id:\t".$data[$hpos{'FEATURE_ID'}]."\tmatchindex:\t".$data[$hpos{'MATCH_INDEX'}]."\n";
+		  
+		  if($ops && ($data[$hpos{'FEATURE_ID'}] ne $ops->name())){
+			#THis is where we chose to update/validate
+			#Do we need to pass probes if they're already stored..may aswell to reduce mysql load?
+			#No point as we have to query anyway
+			$self->store_set_probes_features($achip->dbID(), \%pfs, $ops);
+			throw("ops still defined in caller") if defined $ops;
+		  }
+		  
+		  $ops = Bio::EnsEMBL::Funcgen::ProbeSet->new(
+													  -NAME => $data[$hpos{'FEATURE_ID'}],
+													  -SIZE => undef,
+													  -FAMILY  => $data[$hpos{'CONTAINER'}],
+													  #xref_id => $data[$hpos{'SEQ_ID'}],#Need to populate xref table
+													 );
+		  
+		  #should we store straight away or build a probeset/probe/feature set, and then store and validate in turn?
+		  #Store directly have separate method to validate and update?
+		  #would need to check if one exists before storing anyway, else we could potentially duplicate the same probe/probeset from a different array
+		  #remember for affy we need duplicate probe records with identical probe ids, probeset records unique across all arrays
+		  
+		  undef %pfs
+		}
+		elsif($. > 2){#may have previous ops set, but next has no ops, or maybe just no ops's at all
+		  $self->store_set_probes_features($achip->dbID(), \%pfs, $ops);
+		  throw("ops still defined in caller") if defined $ops;
+		}
+	
+		
+		###PROBES
+		#should we cat $xref_id to $probe_id here to generate unique id?
+		#would be messy to handle in the code, but would have to somewhere(in the retrieval code)
+		
+		$length = length($data[$hpos{'PROBE_SEQUENCE'}]);	
+		#$probe_string .= "\t${psid}\t".$data[$hpos{'PROBE_ID'}]."\t${length}\t$ac_id\t${class}\n";
+		
+		$op = Bio::EnsEMBL::Funcgen::Probe->new(
+												-NAME          => $data[$hpos{'PROBE_ID'}],
+												-LENGTH        => $length,
+												-ARRAY         => $array,
+												-ARRAY_CHIP_ID => $achip->dbID(),
+												-CLASS         => $class,
+											   );
+		
+		
+		%{$pfs{$data[$hpos{'PROBE_ID'}]}} = (
+											 probe => $op,
+											 features => [],
+											);
+		
+				
+		###PROBE FEATURES
+		#How can we be certain that we have the same mapping in the DB?
+		#Put checks in here for build?
+		#Need to handle controls/randoms here
+		#won't have features but will have results!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+		#The format of the pos file looks like it should have all the data required, but 
+		# chromsome is missing, first undef :(
+		#Need to use $pos here instead of .pos file
+		#However have problems defining probe class, as not populated in test set
+		#derive from container! :(
+		#Ignore controls/random as they won't have a region
+		#Also need to handle multiple mappings? 
+		#As results reference probes, no features, can have multiple features on different builds
+		
+		#if($class eq "EXPERIMENTAL"){
+		
+		#if(exists $regions{$data[$hpos{'SEQ_ID'}]}){
+		#  $fid++;
+		#  $pf_string .= "\t".$regions{$data[$hpos{'SEQ_ID'}]}{'seq_region_id'}."\t".$data[$hpos{'POSITION'}]."\t".
+		#    ($data[$hpos{'POSITION'}] + $length)."\t${strand}\t".$regions{$data[$hpos{'SEQ_ID'}]}{'coord_system_id'}.
+		#	"\t${pid}\t${anal_id}\t".$data[$hpos{'MISMATCH'}]."\t${cig_line}\n";
+		#$loc .= $regions{$data[$hpos{'SEQ_ID'}]}{'seq_region_id'}.":".$data[$hpos{'POSITION'}].
+		#  "-".($data[$hpos{'POSITION'}] + $length).";" if ($self->{'_dump_fasta'});
+		# }
+		# else{ 
+		#   die("No regions defined for ".$data[$hpos{'SEQ_ID'}]." ".$data[$hpos{'PROBE_ID'}].
+		#" with family ".$data[$hpos{'CONTAINER'}]);
+		#  }
+		
+	
+				
+	
+		if ($self->dump_fasta()){
+		  #(my $chr = $probe_pos{$data[$hpos{'PROBE_ID'}]}->{'chr'}) =~ s/chr//;
+
+		  #$loc .= $chr.":".$probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'}."-".
+		#	($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'}+ $length).";";
+	
+		  #$fasta .= ">".$data[$hpos{'PROBE_ID'}]."\t".$data[$hpos{'CHROMOSOME'}].
+		  #	"\t$loc\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
+
+
+
+		  #filter controls/randoms?  Or would it be sensible to see where they map
+		  #wrap seq here?
+		  #$fasta .= ">".$data[$hpos{'PROBE_ID'}]."\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
+
+
+		  #To use this for mapping, we really need the dbID nr fasta
+		  #This can be generated after the import, or maybe during resolve?
+		  #This is also currently done on a chip level, where as the cache is resolved at the array level
+		  #We could simply cat the files before resolving the fasta file
+		  #Need to do this otherwise we risk overwriting the fasta file with incomplete data.
+		  #Can we validate sequence across probes with same name in this step?
+		  #Just use probe name for now.
+		  
+		  #We could cat and sort the fastas to make sure we have the same sequences
+		  #Need to dump the design_id in the fasta header
+		  #This would also reduce IO on the DB as identical probe will be consecutive, hence just one query to get the id.
+
+		  #Changed th format an content of this to facilitate dbID nr fasta file generation and sequence validation
+		  
+		  $fasta .= ">".$data[$hpos{'PROBE_ID'}]."\t".$achip->design_id."\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
+
+		  #Print fasta every 10000 lines
+		  if(! ($. % 10000)){
+			print $f_out $fasta;
+			$fasta = '';
+		  }
+		}
+
+
+		#Hack!!!!!! Still importing probe (and result?)
+		next if(!  $self->cache_slice($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'chr'}));
+		#warn("Skipping non standard probe (".$data[$hpos{'PROBE_ID'}].") with location:\t$loc\n");
+		  				
+		
+		$of = Bio::EnsEMBL::Funcgen::ProbeFeature->new
+		  (
+		   -START         => $probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'},
+		   -END           =>($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'}  + $length),
+		   -STRAND        => $strand,
+		   -SLICE         => $self->cache_slice($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'chr'}),
+		   -ANALYSIS      => $anal,
+		   -MISMATCHCOUNT => $data[$hpos{'MISMATCH'}],#Is this always 0 for import? remove from header hash?
+		   -PROBE         => undef,	#Need to update this in the store method
+		  );
+				
+	
+		push @{$pfs{$data[$hpos{'PROBE_ID'}]}{'features'}}, $of;
+	
+      }
+      
+      #need to store last data here
+      $self->store_set_probes_features($achip->dbID(), \%pfs, $ops);
+      $self->log(join("\n", @log));
+      $achip->adaptor->store_status("IMPORTED", $achip);
+	  $self->log("ArrayChip:\t".$achip->design_id()." has been IMPORTED");
+      
+      if ($self->dump_fasta()){
+		print $f_out $fasta;
+		close($f_out);
+      }
+    
+	  $self->log("Imported design from:\t".$achip->name().".ndf", 1);
+		
+
+
+	  #$self->{'_probe_cache'} = undef;#As we can't get Y and Y info from the DB, this is only possible as the results files contain X and Y info
+	}
+	
+    #Should we build hash of probe_names:probe_feature_ids here for results import
+    #Should we dump this as a lookup file for easier recoverability
+    #This is the biggest step in the import
+    #Building the hash would be fastest but least recoverable
+    #Would have to write recover statments for each step i.e. build the most recent data structure required for the next import step
+    #Individual queries for each result would take ages
+    #This is all assuming there are no random records in the table i.e. ID series is linear with no gaps.
+    
+    
+    #Could throw a random validation check in every X entries?
+    #This would only work for non-parallel imports
+    #periodic import when hit new probe_set, but no new data printed
+		
+  }
+  
+  
+  $self->log("Finished parsing probe data");
+  #Total probe_sets:\t$psid\n".
+  #	       "Total probes:\t$pid\nTotal probe_features:\t$fid");
+ 
+
+  $self->resolve_probe_data();
+ 
+  return;
+}
+
+
+
+
+=head2 read_and_import_results_data
+
+  Example    : $imp->read_results_data();
+  Description: Parses and dumps raw results to file
+  Returntype : none
+  Exceptions : none
+  Caller     : Importer
+  Status     : at risk 
+
+=cut
+
+ 
+sub read_and_import_results_data{
+  my $self = shift;
+  
+  $self->log("Parsing ".$self->vendor()." results");
+  my (@header, @data, @design_ids, @lines);
+  my ($fh, $pid, $line, $file);
+  my $anal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("RawValue");
+  my $result_set = $self->get_import_ResultSet($anal, 'channel');
+
+
+  if ($result_set) {			#we have some new data to import
+    
+    foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) {
+	  
+      #if( ! $echip->has_status('IMPORTED')){
+		
+	  foreach my $chan (@{$echip->get_Channels()}) {
+		
+		if ( ! $chan->has_status('IMPORTED')) {
+
+		  #we need to set write_mage here
+
+		  my $array = $echip->get_ArrayChip->get_Array();
+		  
+		  $self->get_probe_cache_by_Array($array) || throw('Failed to get the probe cache handle for results import, resolve cache here?');			
+		  my ($probe_elem, $score_elem, %hpos);
+		  my $cnt = 0;
+		  my $r_string = "";
+		  my $chan_name = $echip->unique_id()."_".$self->get_config('dye_freqs')->{$chan->dye()};
+		  my $cc_id = $result_set->get_chip_channel_id($chan->dbID());
+		  
+		  
+		  #if ($self->recovery()) {
+		#	$self->log("Rolling back results for channel:\t${chan_name}");
+		#	$self->db->rollback_results($cc_id);
+		#  }
+		  
+		  #open/backup output
+		  my $out_file = $self->get_dir("raw")."/result.".$chan_name.".txt";	
+		  $self->backup_file($out_file);
+		  my $r_out = open_file($out_file, '>');
+		  
+		  (my $alt_chan_name = $chan_name) =~ s/\_/\_1h\_/;
+		  my $found = 0;
+		  
+		FILE: foreach my $name($chan_name, $alt_chan_name){
+			
+			foreach my $suffix ("_pair.txt", ".pair", ".txt") {
+			  
+			  $file = $self->get_dir("results")."/".$name.$suffix;
+			  
+			  if (-f $file) {
+				$found = 1;
+				last FILE;
+			  }
+			}
+		  }
+		  
+		  throw("Could not find result file for Channel(${chan_name}) in ".$self->get_dir('results')) if ! $found;
+		  
+		  #open/slurp input
+		  $self->log("Reading result for channel $chan_name:\t$file", 1);
+		  $fh = open_file($file);	
+		  @lines = <$fh>;
+		  close($fh);
+		  
+					
+		  ###PROCESS HEADER
+		  
+		  foreach my $i (0..$#lines) {
+			
+			if ($lines[$i] =~ /PROBE_ID/o) {
+			  $lines[$i] =~ s/\r*\n//o;
+			  @data = split/\t/o, $lines[$i];
+			  
+			  %hpos = %{$self->set_header_hash(\@data, $self->get_config('result_fields'))};
+			  
+			  #remove header
+			  splice @lines, $i, 1;
+			  
+			  last;				#finished processing header
+			}
+		  }
+		  
+		  #we need to sort the result files based on the unique key(name at present, should replace with seq at some point)
+		  @lines = sort {(split/\t/o, $a)[$hpos{'PROBE_ID'}] cmp (split/\t/o, $b)[$hpos{'PROBE_ID'}]} @lines;
+		  
+		  $self->log('Parsing results', 1);
+		  
+		  foreach $line(@lines) {
+			
+			#can we preprocess effectively?
+			next if $line =~ /^#/;
+			next if $line =~ /NGS_CONTROLS/;
+			next if $line =~ /V_CODE/;
+			next if $line =~ /H_CODE/;
+			next if $line =~ /RANDOM/;
+			
+			$line =~ s/\r*\n//o;
+			@data = split/\t/o, $line;
+			
+			###PROCESS HEADER
+			#if ($line =~ /PROBE_ID/o){
+			#  
+			#  %hpos = %{$self->set_header_hash(\@data, $self->get_config('result_fields'))};
+			#  next;#finished processing header
+			#}
+			
+			###PROCESS DATA
+			#Is this string concat causing the slow down, would it befaster to use an array and print a join?
+			
+			if ($pid = $self->get_probe_id_by_name_Array($data[$hpos{'PROBE_ID'}], $array)) {
+			  $cnt ++;
+			  $r_string .= '\N'."\t${pid}\t".$data[$hpos{'PM'}]."\t${cc_id}\t".$data[$hpos{'X'}]."\t".$data[$hpos{'Y'}]."\n";
+			} else {
+			  warn "Found unfiltered non-experimental probe in input $data[$hpos{'PROBE_ID'}]";
+			}
+			
+			###PRINT SOME RESULTS
+			if ($cnt > 10000) {
+			  $cnt = 0;
+			  print $r_out $r_string;
+			  $r_string ="";
+			  #could we fork here and import in the background?
+			}
+			
+		  } 
+		  #PRINT/CLOSE Channel file
+		  print $r_out $r_string;
+		  close($r_out);
+		  $self->log("Finished parsing $chan_name result", 1);
+		  
+		  #Import directly here to avoid having to reparse all results if we crash!!!!
+		  $self->log("Importing:\t$out_file");
+		  $self->db->load_table_data("result",  $out_file);
+		  $self->log("Finished importing:\t$out_file", 1);
+		  $chan->adaptor->store_status('IMPORTED', $chan);
+		  
+		  
+		}
+	  }
+	}
+  } 
+  else {
+	$self->log("Skipping results parse and import");
+  }
+  
+  $self->log("Finished parsing and importing results");
+  
+  return;
+}
+  
+
+
+1;
author	mahtabm
date	Thu, 11 Apr 2013 02:01:53 -0400
parents
children