annotate variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/Nimblegen.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2 # EnsEMBL module for Bio::EnsEMBL::Funcgen::Parsers::Nimblegen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 Copyright (c) 1999-2011 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 developers list at <ensembl-dev@ebi.ac.uk>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 Bio::EnsEMBL::Funcgen::Parsers::Nimblegen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 =head1 SYNOPSIS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 my $parser_type = "Bio::EnsEMBL::Funcgen::Parsers::Nimblegen";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 push @INC, $parser_type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 my $imp = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 This is a parser class which should not be instatiated directly, it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 normally set by the Importer as the parent class. Nimblegen contains meta
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 data and methods specific to NimbleGen arrays to aid parsing and importing of
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 experimental data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 package Bio::EnsEMBL::Funcgen::Parsers::Nimblegen;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 use Bio::EnsEMBL::Funcgen::Array;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 use Bio::EnsEMBL::Funcgen::ProbeSet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 use Bio::EnsEMBL::Funcgen::Probe;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 use Bio::EnsEMBL::Funcgen::ProbeFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 use Bio::EnsEMBL::Funcgen::FeatureType;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 use Bio::EnsEMBL::Funcgen::ExperimentalChip;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 use Bio::EnsEMBL::Funcgen::ArrayChip;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 use Bio::EnsEMBL::Funcgen::Channel;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 use Bio::EnsEMBL::Utils::Exception qw( throw warning deprecate );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw(species_chr_num open_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 use Bio::EnsEMBL::Funcgen::Parsers::MAGE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 @ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::MAGE);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 =head2 new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 Example : my $self = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 Description: Constructor method for Nimblegen class
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 Returntype : Bio::EnsEMBL::Funcgen::Parser::Nimblegen
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 Exceptions : throws if Experiment name not defined or if caller is not Importer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 Caller : Bio::EnsEMBL::Funcgen::Importer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 Status : at risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 sub new{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 my $caller = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 my $self = $class->SUPER::new(@_);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 throw("This is a skeleton class for Bio::EnsEMBL::Importer, should not be used directly") if(! $self->isa("Bio::EnsEMBL::Funcgen::Importer"));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 #should we provide args override for all of these?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 $self->{'config'} =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 #order of these data arrays is important!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 #Remove these method arrays, snd just run them serially?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 array_data => ['experiment'],#Rename this!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 probe_data => ["probe"],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 results_data => ["and_import_results"],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 sample_key_fields => ['DESIGN_ID', 'CHIP_ID', 'DYE', 'PROMOT_SAMPLE_TYPE'],# 'SAMPLE_LABEL'],label now optional
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 # 'SAMPLE_DESCRIPTION removed due to naming disparities
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 ndf_fields => ['CONTAINER', 'PROBE_SEQUENCE', 'MISMATCH','FEATURE_ID', 'PROBE_ID'],#MISMATCH is always 0!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 pos_fields => ['CHROMOSOME', 'PROBE_ID', 'POSITION', 'COUNT'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 result_fields => ['PROBE_ID', 'PM', 'X', 'Y'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 notes_fields => ['DESIGN_ID', 'DESIGN_NAME', 'DESCRIPTION'],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 norm_method => 'VSN_GLOG',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 dye_freqs => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 Cy5 => 635,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 Cy3 => 532,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 #Need to make these definable?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 #have protocolfile arg and just parse tab2mage protocol section format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 protocols => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 grow => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 accession => 'GROW_NIMB',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 name => 'GROW NIMBLEGEN CULTURE CONDITIONS',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 text => 'Nimblegen culture conditions description here. Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 paramters => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 treatment => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 accession => 'CROSSLINK_NIMB',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 name => 'NIMBLEGEN CHROMATIN PREPARATION',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 text => 'Nimblegen X-linking and DNA extraction protocol.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 paramters => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 extraction => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 accession => 'CHROMATIN_IP_NIMB',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 name => 'NIMBLEGEN CHROMATIN IMMUNOPRECIPITATION and DNA RECOVERY',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 text => 'Nimblegen chromatin immunoprecipitation and DNA extraction protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 paramters => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 labeling => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 accession => 'LABELLING_NIMB',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 name => 'NIMBLEGEN LABELLING',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 text => 'Nimblegen labelling protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 paramteres => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 hybridization => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 accession => 'HYBRIDISATION_NIMB',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 name => 'NIMBLEGEN HYBRIDISATION',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 text => 'Nimblegen chip hybridisation protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 parameters => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 scanning => {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 accession => 'SCANNING_NIMB',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 name => 'NIMBLESCAN',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 text => 'Nimblegen Nimblescan protocol here.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.Padding text here to avoid description too short warnings.',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 paramters => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 )},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 )};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 =head2 set_config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 Example : my $self->set_config;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 Description: Sets attribute dependent config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 Returntype : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 Caller : Bio::EnsEMBL::Funcgen::Importer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 Status : at risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 sub set_config{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 #This should be general config for all types of import
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 #dirs are not set in config to enable generic get_dir method access
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 #This is really just setting paths rather than config rename?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 #This is generic for all imports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 if($self->{'old_dvd_format'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 $self->{'design_dir'} = $self->get_dir('input').'/DesignFiles';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 $self->{'design_dir'} = $self->get_dir('input').'/Design_information';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 if($self->{'old_dvd_format'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 $self->{'config'}{'notes_file'} = $self->get_dir('input').'/DesignNotes.txt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 $self->{'config'}{'notes_file'} = $self->get_dir('design').'/DesignNotes.txt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 $self->{'config'}{'chip_file'} = $self->get_dir('input').'/SampleKey.txt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 #Experiment(output) specific
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 #This should already be set in the run script
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 #As we could get log write errors before we have created the output dir otherwise
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 $self->{'output_dir'} ||= $self->get_dir("data").'/output/'.$self->{'param_species'}.'/'.$self->vendor().'/'.$self->name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 $self->{'config'}{'tab2mage_file'} = $self->get_dir('output').'/E-TABM-'.$self->name().'.txt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199 $self->{'config'}{'mage_xml_file'} = $self->get_dir('output').'/{UNASSIGNED}.xml';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 if($self->{'old_dvd_format'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 $self->{'results_dir'} = $self->get_dir('input').'/PairData';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 $self->{'results_dir'} = $self->get_dir('input').'/Raw_data_files';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 =head2 read_array_data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 Example : $imp->read_array_data();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 Description: Parses NimbleGen DesignNotes.txt files to create and store new Arrays
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 Exceptions : None
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 Caller : general
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 Status : At risk - Can this be generic? Can we force the creation of a DesignNotes file on other formats?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 sub read_array_data{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 my ($self, $notes_file) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 $notes_file ||= $self->get_config('notes_file');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 my ($line, $array, $array_chip, @data, %hpos);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 my $oa_adaptor = $self->db->get_ArrayAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 my $ac_adaptor = $self->db->get_ArrayChipAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 #Slurp file to string, sets local delimtter to null and subs new lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 my $fh = open_file($notes_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 #($design_desc = do { local ($/); <$fh>;}) =~ s/\r*\n$//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 #close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 #Would be better if we only import the design info for the chips listed in the SampleKey.txt file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 #Some cyclical dependency going on here :|
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 while ($line = <$fh>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 $line =~ s/\r*\n//;#chump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 @data = split/\t/o, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 #We need to have a DESIGN vendor type?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 #also need to be able to set file path independently of config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 if($. == 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 %hpos = %{$self->set_header_hash(\@data, $self->get_config('notes_fields'))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 ### CREATE AND STORE Array and ArrayChips
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 if(! defined $array ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 #This is treating each array chip as a separate array, unless arrayset is defined
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 #AT present we have no way of differentiating between different array_chips on same array???!!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 #Need to add functionality afterwards to collate array_chips into single array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 #This will use a stored array if present
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 $array = Bio::EnsEMBL::Funcgen::Array->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265 -NAME => $self->array_name() || $data[$hpos{'DESIGN_NAME'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 -FORMAT => uc($self->format()),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267 -VENDOR => uc($self->vendor()),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 -TYPE => 'OLIGO',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 -DESCRIPTION => $data[$hpos{'DESCRIPTION'}],#need to trim the array chip specific description here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 ($array) = @{$oa_adaptor->store($array)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 $array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 -ARRAY_ID => $array->dbID(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 -NAME => $data[$hpos{'DESIGN_NAME'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 -DESIGN_ID => $data[$hpos{'DESIGN_ID'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278 #add description?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 #This will use a stored array_chip if present
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 ($array_chip) = @{$ac_adaptor->store($array_chip)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283 $array->add_ArrayChip($array_chip);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 elsif((! $array->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])) && ($self->array_set())){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 $self->log("Generating new ArrayChip(".$data[$hpos{'DESIGN_NAME'}].") for same Array:\t".$array->name()."\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 $array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 -ARRAY_ID => $array->dbID(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 -NAME => $data[$hpos{'DESIGN_NAME'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 -DESIGN_ID => $data[$hpos{'DESIGN_ID'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296 ($array_chip) = @{$ac_adaptor->store($array_chip)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297 $array->add_ArrayChip($array_chip);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 elsif(! $array->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301 throw("Found experiment with more than one design without -array_set");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306 $self->add_Array($array);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 =head2 read_experiment_data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317 Example : $imp->read_array_chip_data();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 Description: Parses and imports array & experimental chip meta data/objects
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 Exceptions : throws if more than one array/design found and not an "array set"
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 Caller : Importer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 Status : At risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326 sub read_experiment_data{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 $self->read_array_data();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 my $t2m_file = $self->init_tab2mage_export() if $self->{'write_mage'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 my ($design_desc, $line, $tmp_uid, $channel, $echip, $sample_label);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 my ($sample_desc, %hpos, @data, %uid_reps, %did_reps, %sample_reps);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 my $ec_adaptor = $self->db->get_ExperimentalChipAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 my $chan_adaptor = $self->db->get_ChannelAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 my $br_cnt = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 my $tr_cnt = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 #Currently 1 design = 1 chip = 1 array /DVD
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 #Different designs are not currently collated into a chip_set/array in any ordered manner
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 #Register each design as an array and an array_chip
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 #May want to group array_chips into array/chip sets by association though the API
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 warn("Harcoded for one array(can have multiple chips from the same array) per experiment\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 my $fh = open_file($self->get_config("chip_file"));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 $self->log("Reading chip data");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352 #warn "Do we need to validate each line here against the header array?";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354 while ($line = <$fh>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 next if $line =~ /^\s+\r*\n/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 $line =~ s/\r*\n//;#chump
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 @data = split/\t/o, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358 #we could validate line against scalar of header array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 #ORD_ID CHIP_ID DYE DESIGN_NAME DESIGN_ID SAMPLE_LABEL SAMPLE_SPECIES SAMPLE_DESCRIPTION TISSUE_TREATMENT PROMOT_SAMPLE_TYPE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 if ($. == 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 %hpos = %{$self->set_header_hash(\@data, $self->get_config('sample_key_fields'))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363 #we need to set the sample description field name, as it can vary :(((
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364 @data = grep(/SAMPLE_DESCRIPTION/, keys %hpos);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365 $sample_desc = $data[0];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 throw("More than one sample description(@data) in ".$self->get_config("chip_file")."\n") if(scalar @data >1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 #Need to handle array class here i.e. two channel arrays will have two lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 #validate species here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373 #look up alias from registry and match to self->species
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 #registry may not be loaded for local installation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 ### CREATE AND STORE ExperimentalChips
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378 if ((! $tmp_uid) || ($data[$hpos{'CHIP_ID'}] ne $tmp_uid)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 #Test both channels are available, i.e. the SampleKey has two TOTAL channels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 if($echip){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 for my $type('TOTAL', 'EXPERIMENTAL'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 my $test_chan = $chan_adaptor->fetch_by_type_experimental_chip_id($type, $echip->dbID());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 throw("ExperimentalChip(".$echip->unique_id().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 ") does not have a $type channel, please check the SampleKey.txt file") if ! $test_chan;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 $tmp_uid = $data[$hpos{'CHIP_ID'}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 $echip = $ec_adaptor->fetch_by_unique_id_vendor($data[$hpos{'CHIP_ID'}], 'NIMBLEGEN');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 if($echip){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 if(! $self->recovery()){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 throw("ExperimentalChip(".$echip->unqiue_id().
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 " already exists in the database\nMaybe you want to recover?");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 $echip = Bio::EnsEMBL::Funcgen::ExperimentalChip->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 -EXPERIMENT_ID => $self->experiment->dbID(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 -DESCRIPTION => $data[$hpos{$sample_desc}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407 -FEATURE_TYPE => $self->feature_type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408 -CELL_TYPE => $self->cell_type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409 -ARRAY_CHIP_ID => $self->arrays->[0]->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])->dbID(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410 -UNIQUE_ID => $data[$hpos{'CHIP_ID'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411 #-BIOLOGICAL_REPLICATE => ,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 #-TECHNICAL_REPLICATE => ,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 ($echip) = @{$ec_adaptor->store($echip)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 $self->experiment->add_ExperimentalChip($echip);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421 ### CREATE AND STORE Channels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422 my $type = uc($data[$hpos{'PROMOT_SAMPLE_TYPE'}]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423 my $sample_label = (! exists $hpos{'SAMPLE_LABEL'}) ? '' : $data[$hpos{'SAMPLE_LABEL'}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 $type = 'TOTAL' if ($type ne 'EXPERIMENTAL');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427 $channel = $chan_adaptor->fetch_by_type_experimental_chip_id($type, $echip->dbID());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 if($channel){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 if(! $self->recovery()){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 throw("Channel(".$echip->unqiue_id().":".uc($data[$hpos{'PROMOT_SAMPLE_TYPE'}]).
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 " already exists in the database\nMaybe you want to recover?");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 #push @{$self->{'_rollback_ids'}}, $channel->dbID();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 #No point in doing this as all Channels mey be pre-registered in recovery mode
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436 #Hence all will be rolled back
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439 #Handles single/mutli
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 $channel = Bio::EnsEMBL::Funcgen::Channel->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 -EXPERIMENTAL_CHIP_ID => $echip->dbID(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 -DYE => $data[$hpos{'DYE'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445 -SAMPLE_ID => $sample_label,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 -TYPE => $type,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 #-SPECIES => $self->species(),#on channel/sample to enable multi-species chip/experiment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 #would never happen on one chip? May happen between chips in one experiment
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 ($channel) = @{$chan_adaptor->store($channel)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 #we need to build the channel level tab2mage line here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 #For each BR there will be two sample_labels, one for each channel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 #These will be used across multiple chips.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 #If two chips the same design ID and the same sample labels, then we have a technical replicate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 #else if they have different sample labels then we have another biological replicate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 #We have a problem of associating channels to the same BR with differing sample labels
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 #This is solved by checking whether the chip ID has already been registered in a BR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 #This fails if more than one sample label is used for any given BR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 #This will result in the BR being split into the number of unique sample label pairs(Experimental/Control channel)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467 #This also fails if the same sample label has been used for two different BRs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469 if($self->{'write_mage'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470 #my $sample_name = ($sample_label eq '') ? '???' : substr($sample_label, 0, (length($sample_label)-1));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 my $ctype_name = (defined $self->cell_type()) ? $self->cell_type->name() : '???';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 my $ftype_name = (defined $self->feature_type()) ? $self->feature_type->name() : '???';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 my $ctype_desc = (defined $self->cell_type()) ? $self->cell_type->description() : '???';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 #define reps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478 #we need one to get the biorep based on the sample label and making sure the unique ID are the same
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 #we need to define the tech rep by matching the sample label and the making sure the design_id isn't already used
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 #Is this doing the BR assignment properly?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 if(exists $sample_reps{$sample_label}){#Found chip in a previously seen BR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484 #Register the BR of this chip ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485 $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'} = $sample_reps{$sample_label};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
488 elsif(exists $uid_reps{$data[$hpos{'CHIP_ID'}]}){#Found the other channel
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
489 $sample_reps{$sample_label} = $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
490 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
491 else{#assign new br
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
492 $sample_reps{$sample_label} = $br_cnt; #Assign BR to sample label
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
493 $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'} = $br_cnt; #Assign BR to chip id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
494 $br_cnt++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
495 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
496
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
497
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
498
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
499 #Something is going awry here. The TR is not being reset for some new BRs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
500
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
501 if(! exists $uid_reps{$data[$hpos{'CHIP_ID'}]}{'tr'}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
502 #we only assign a new tr here if this design has not been seen in any of the reps
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
503 #i.e. we need to get the first tr which does not contain this design_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
504
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
505 my $create_rep = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
506 my $tr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
507 my @chip_ids;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
508 my $br = $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
509
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
510 foreach my $chip_id(keys %uid_reps){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
511
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
512 push @chip_ids, $chip_id if($uid_reps{$chip_id}{'br'} == $br);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
513 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
514
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
515 #This is looping through all the TRs for all the design IDs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
516
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
517 foreach my $rep(sort keys %did_reps){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
518 #Doesn't exist for the given BR?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
519 #So we need to get all the chip_ids for a given br
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
520 #Check wether it exists and wether it exists in did_reps and check wether the chip_id value is part of the BR set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
521 #else we add it
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
522
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
523 if(! exists $did_reps{$rep}{$data[$hpos{'DESIGN_ID'}]}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
524 #Not seen in a TR of this $rep yet
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
525 $create_rep = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
526 }elsif(! grep(/$did_reps{$rep}{$data[$hpos{'DESIGN_ID'}]}/, @chip_ids)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
527 #Not seen in this BR with this TR $rep
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
528 $create_rep = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
529 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
530
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
531 if(! $create_rep){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
532 #Design ID not seen so add to this TR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
533 $did_reps{$rep}{$data[$hpos{'DESIGN_ID'}]} = $data[$hpos{'CHIP_ID'}]; #don't really need to assign this
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
534 $tr = $rep;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
535 last;#do not remove this or we get wierd TR incrementing
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
536 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
537 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
538
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
539
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
540 if($create_rep){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
541 #Get the next TR value for this given BR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
542 my @trs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
543
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
544 foreach my $rep(keys %did_reps){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
545
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
546 foreach my $chip_id(values %{$did_reps{$rep}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
547
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
548 #Push TR if chip_id is present in this BR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
549 push @trs, $rep if(grep(/$chip_id/, @chip_ids));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
550 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
551 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
552 ($tr) = sort {$b<=>$a} @trs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
553
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
554 $tr ||=0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
555 $tr++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
556
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
557 #register design ID to chip ID mapping for this TR
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
558 $did_reps{$tr}{$data[$hpos{'DESIGN_ID'}]} = $data[$hpos{'CHIP_ID'}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
559 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
560
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
561 #register TR for this chip ID
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
562 $uid_reps{$data[$hpos{'CHIP_ID'}]}{'tr'} = $tr;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
563 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
564
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
565 my $br = $self->experiment->name().'_BR'. $uid_reps{$data[$hpos{'CHIP_ID'}]}{'br'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
566 my $tr = $br.'_TR'.$uid_reps{$data[$hpos{'CHIP_ID'}]}{'tr'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
567
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
568
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
569 #File[raw]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
570 my $tsm_line = $echip->unique_id().'_'.$self->get_config('dye_freqs')->{$data[$hpos{'DYE'}]}.'_pair.txt';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
571 #Array[accession] # Should this be left blank for AE accession?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
572 $tsm_line .= "\t".$data[$hpos{'DESIGN_ID'}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
573 #Array[serial]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
574 $tsm_line .= "\t".$echip->unique_id();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
575
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
576 #Protocol(s)[grow][treatment][extraction][labelling][hybridisation][scanning]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
577 foreach my $protocol(sort (keys %{$self->get_config('protocols')})){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
578 $tsm_line .= "\t".$self->get_config('protocols')->{$protocol}->{'accession'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
579 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
580
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
581
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
582 #BioSource
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
583 $tsm_line .= "\t$ctype_name";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
584 #Sample
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
585 $tsm_line .= "\t$br";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
586 #Extract
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
587 $tsm_line .= "\t$tr";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
588
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
589 #LabeledExtract & Immunoprecipitate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
590 if($type eq 'EXPERIMENTAL'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
591 $tsm_line .= "\t$sample_label - IP of $tr with anti $ftype_name (Ab vendor, Ab ID)";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
592 $tsm_line .= "\t$tr IP";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
593 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
594 $tsm_line .= "\t$sample_label - Input control DNA of $tr\t";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
595 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
596
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
597 #Hybridization
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
598 #U2OS BR1_TR1 ChIP H3KAc 46092 hyb
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
599 $tsm_line .= "\t$ctype_name $tr ChIP $ftype_name ".$echip->unique_id().' hyb';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
600
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
601 #BioSourceMaterial SampleMaterial ExtractMaterial LabeledExtractMaterial
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
602 $tsm_line .= "\tcell\tgenomic_DNA\tgenomic_DNA\tsynthetic_DNA";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
603
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
604 #Dye
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
605 $tsm_line .= "\t".$data[$hpos{'DYE'}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
606
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
607 #BioMaterialCharacteristics[Organism]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
608 $tsm_line .= "\t".$self->species();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
609
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
610 #BioMaterialCharacteristics[BioSourceType]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
611 $tsm_line .= "\tfrozen_sample";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
612
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
613 #BioMaterialCharacteristics[StrainOrLine]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
614 $tsm_line .= "\t$ctype_name";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
615
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
616 #BioMaterialCharacteristics[CellType]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
617 $tsm_line .= "\t$ctype_name";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
618
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
619 #BioMaterialCharacteristics[Sex]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
620 $tsm_line .= "\t???";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
621 #FactorValue[StrainOrLine]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
622 $tsm_line .= "\t$ctype_name";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
623 #FactorValue[Immunoprecipitate]
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
624 $tsm_line .= ($type eq 'EXPERIMENTAL') ? "\tanti-${ftype_name} antibody\n" : "\t\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
625
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
626 print $t2m_file $tsm_line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
627
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
628 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
629
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
630 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
631
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
632 close($t2m_file) if $self->{'write_mage'};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
633 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
634
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
635 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
636 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
637
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
638
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
639
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
640
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
641 =head2 read_probe_data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
642
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
643 Example : $imp->read_probe_data();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
644 Description: Parses and imports probes, probe sets and features of a given array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
645 No duplicate handling or probe caching is performed due to memory
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
646 issues, this is done in resolve_probe_data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
647 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
648 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
649 Caller : Importer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
650 Status : Medium
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
651
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
652 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
653
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
654
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
655 #Assumes one chip_design per experimental set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
656 sub read_probe_data{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
657 my ($self) = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
658
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
659 my ($fh, $line, @data, %hpos, %probe_pos);#, %duplicate_probes);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
660 $self->log("Parsing and importing ".$self->vendor()." probe data (".localtime().")", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
661
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
662 ### Read in
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
663 #ndf file: probe_set, probe and probe_feature(.err contains multiple mappings)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
664 #pos file: probe chromosome locations
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
665
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
666 #Need to change how probe_names are generated for nimblegen?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
667 #native probe_ids may not be unique, but should be when combined with the seq_id which is currently being used as the xref_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
668 #Handle with API!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
669
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
670 #READ REGION POSITIONS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
671 #We need to handle different coord systems and possibly different assmemblies
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
672 my $slice_a = $self->db->get_SliceAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
673 my $cs = $self->db->get_FGCoordSystemAdaptor()->fetch_by_name('chromosome');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
674
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
675
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
676 #TIED FILE CACHE!!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
677 #We need to rebuild the cache from the DB before we start adding new probe info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
678 #We only need to rebuild cache if we find a chip that hasn't been imported?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
679 #No, we just need to import without cache, then re-do the resolve step
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
680 #Are we still going to get disconnects when we dump the cache?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
681
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
682
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
683 #warn "Read probe data should only read in the array chips which are specified by the ExperimentalChip? Not just what is present in the DesignNotes file?";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
684
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
685
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
686 foreach my $array(@{$self->arrays()}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
687
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
688 foreach my $achip(@{$array->get_ArrayChips()}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
689
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
690 my (@log, %probe_pos, $fasta_file, $f_out);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
691 #do we need to fetch probe by seq and array?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
692 #this would also id non-unique seqs in design
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
693
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
694 #warn "We need to account for different cs feature amppings here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
695
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
696 if($achip->has_status('IMPORTED')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
697 $self->log("Skipping fully imported ArrayChip:\t".$achip->design_id());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
698 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
699 }elsif($self->recovery()){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
700 $self->log("Rolling back ArrayChip:\t".$achip->design_id());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
701 $self->rollback_ArrayChips([$achip]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
702 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
703
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
704 $self->log("Importing ArrayChip:".$achip->design_id());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
705
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
706 #Always use pos file, ndf file cannot be guranteed to contain all location info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
707 #pos file also gives a key to which probes should be considered 'EXPERIMENTAL'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
708
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
709 #CACHE PROBE POSITIONS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
710 $fh = open_file($self->get_dir("design")."/".$achip->name().".pos");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
711
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
712 #don't % = map ! Takes a lot longer than a while ;)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
713 while($line = <$fh>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
714 $line =~ s/\r*\n//o;#Not using last element
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
715 @data = split/\t/o, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
716
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
717 #SEQ_ID CHROMOSOME PROBE_ID POSITION COUNT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
718 if ($. == 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
719 %hpos = %{$self->set_header_hash(\@data, $self->get_config('pos_fields'))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
720 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
721 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
722
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
723 #Skip probe if there is a duplicate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
724 if(exists $probe_pos{$data[$hpos{'PROBE_ID'}]}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
725
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
726 if($data[$hpos{'CHROMOSOME'}] eq $probe_pos{$data[$hpos{'PROBE_ID'}]}->{chr} &&
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
727 ($data[$hpos{'POSITION'}]+1) eq $probe_pos{$data[$hpos{'PROBE_ID'}]}->{start}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
728 #log or warn here?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
729
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
730 #Not matching probe length here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
731
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
732 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
733 #Do we need to skip this in the ndf file too?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
734
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
735 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
736 else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
737 throw("Found duplicate mapping for ".$data[$hpos{'PROBE_ID'}].
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
738 " need implement duplicate logging/cleaning");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
739 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
740 #need to build duplicate hash to clean elements from hash
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
741 # $duplicate_probes{$data[$hpos{'PROBE_ID'}]} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
742 #next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
743 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
744
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
745
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
746 my $random = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
747
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
748 if(! $self->cache_slice($data[$hpos{'CHROMOSOME'}])){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
749 push @log, "Skipping feature import for probe ".$data[$hpos{'PROBE_ID'}]." with non-standard region ".$data[$hpos{'CHROMOSOME'}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
750
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
751 #should we try and resolve the random chrs here?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
752 #at least store probe/set/result and skip feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
753 #can we just import as chr with no start postition?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
754
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
755 #if ($data[$hpos{'CHROMOSOME'}] =~ /_random/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
756
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
757 # if(! $self->cache_slice($data[$hpos{'CHROMOSOME'}])){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
758 #push @log, "Skipping probe ".$data[$hpos{'PROBE_ID'}]." with non-standard region ".$data[$hpos{'CHROMOSOME'}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
759 #}else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
760 #we should really log this in a seprate file to avoid overloading the lgo file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
761 # push @log, "Importing random probe ".$data[$hpos{'PROBE_ID'}]." on ".$data[$hpos{'CHROMOSOME'}]." omitting position";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
762
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
763 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
764
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
765 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
766
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
767 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
768
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
769 #This is not handling probes with random chrs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
770
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
771 $probe_pos{$data[$hpos{'PROBE_ID'}]} = {(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
772 chr => $data[$hpos{'CHROMOSOME'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
773 start => ($data[$hpos{'POSITION'}] +1),#default UCSC->Ensembl coord conversion
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
774 )};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
775
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
776 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
777
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
778
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
779
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
780
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
781 #Remove duplicate probes
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
782
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
783 $self->log("Built position cache from : ".$achip->name().".pos", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
784 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
785
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
786 $self->log("Importing design probes from : ".$achip->name().".ndf");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
787 #OPEN PROBE IN/OUT FILES
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
788 $fh = open_file($self->get_dir("design")."/".$achip->name().".ndf");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
789 #Need to set these paths in each achip hash, file names could be tablename.chip_id.txt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
790
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
791 #Need to add dbname/port/species/vendor to this path?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
792 #.efg DropDatabase should also clean the fasta dumps and caches for a given DB
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
793
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
794 if($self->dump_fasta()){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
795 $fasta_file = $self->get_dir('fastas').'/'.$achip->name().".fasta";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
796 $self->backup_file($fasta_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
797 $f_out = open_file($fasta_file, '>');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
798 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
799
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
800
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
801 my ($length, $ops, $op, $of, %pfs);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
802
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
803 #should define mapping_method arg to allows this to be set to LiftOver/EnsemblMap
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
804 my $anal = $self->db->get_AnalysisAdaptor()->fetch_by_logic_name("VendorMap");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
805
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
806
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
807 my $strand = 0; #default for nimblegen, should be config hash?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
808
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
809 #my $cig_line = "50M"; #default for nimblegen, should be config hash?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
810 #probe length can change within design, should be built from length
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
811
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
812
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
813 my $fasta = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
814
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
815 #$self->Timer()->mark("Starting probe loop");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
816
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
817
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
818 #This is leaking about 30-60MB for each normal density chip?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
819 #need Devel::Monitor here?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
820
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
821
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
822 while($line = <$fh>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
823 $line =~ s/\r*\n//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
824 @data = split/\t/o, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
825 my $loc = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
826 my $class = "EXPERIMENTAL";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
827
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
828 #PROBE_DESIGN_ID CONTAINER DESIGN_NOTE SELECTION_CRITERIA SEQ_ID PROBE_SEQUENCE MISMATCH MATCH_INDEX FEATURE_ID ROW_NUM COL_NUM PROBE_CLASS PROBE_ID POSITION DESIGN_ID X Y
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
829 #2067_0025_0001 BLOCK1 0 chrX TTAGTTTAAAATAAACAAAAAGATACTCTCTGGTTATTAAATCAATTTCT 0 52822449 52822449 1 25 experimental chrXP10404896 10404896 2067 25 1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
830
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
831 if ($. == 1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
832 %hpos = %{$self->set_header_hash(\@data, $self->get_config('ndf_fields'))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
833 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
834 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
835
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
836
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
837 if (! exists $probe_pos{$data[$hpos{'PROBE_ID'}]}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
838 push @log, "Skipping non-experimental probe:\t".$data[$hpos{'PROBE_ID'}];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
839 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
840 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
841
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
842 #Which non-experimental probes might we want to store?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
843 #if($data[$hpos{'CONTAINER'}] =~ /control/io){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
844 # $class = "CONTROL";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
845 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
846 #elsif($data[$hpos{'CONTAINER'}] =~ /random/io){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
847 # $class = "RANDOM";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
848 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
849 #elsif($data[$hpos{'PROBE_CLASS'}] !~ /experimental/io){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
850 # $class = "OTHER";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
851 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
852 #elsif(! exists $probe_pos{$data[$hpos{'PROBE_ID'}]}){ #HACKY HACKY HACK HACK!! Needed for valid region retrival
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
853 # $class = "OTHER";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
854 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
855 #SPIKE INS?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
856
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
857
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
858
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
859 #This assumes all probes in feature/probeset are next to each other!!!!!!!!!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
860
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
861
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
862 if($data[$hpos{'FEATURE_ID'}] != $data[$hpos{'MATCH_INDEX'}]){#Probe set data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
863 #print "Generating new probeset:\tFeature id:\t".$data[$hpos{'FEATURE_ID'}]."\tmatchindex:\t".$data[$hpos{'MATCH_INDEX'}]."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
864
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
865 if($ops && ($data[$hpos{'FEATURE_ID'}] ne $ops->name())){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
866 #THis is where we chose to update/validate
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
867 #Do we need to pass probes if they're already stored..may aswell to reduce mysql load?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
868 #No point as we have to query anyway
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
869 $self->store_set_probes_features($achip->dbID(), \%pfs, $ops);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
870 throw("ops still defined in caller") if defined $ops;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
871 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
872
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
873 $ops = Bio::EnsEMBL::Funcgen::ProbeSet->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
874 -NAME => $data[$hpos{'FEATURE_ID'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
875 -SIZE => undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
876 -FAMILY => $data[$hpos{'CONTAINER'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
877 #xref_id => $data[$hpos{'SEQ_ID'}],#Need to populate xref table
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
878 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
879
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
880 #should we store straight away or build a probeset/probe/feature set, and then store and validate in turn?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
881 #Store directly have separate method to validate and update?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
882 #would need to check if one exists before storing anyway, else we could potentially duplicate the same probe/probeset from a different array
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
883 #remember for affy we need duplicate probe records with identical probe ids, probeset records unique across all arrays
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
884
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
885 undef %pfs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
886 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
887 elsif($. > 2){#may have previous ops set, but next has no ops, or maybe just no ops's at all
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
888 $self->store_set_probes_features($achip->dbID(), \%pfs, $ops);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
889 throw("ops still defined in caller") if defined $ops;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
890 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
891
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
892
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
893 ###PROBES
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
894 #should we cat $xref_id to $probe_id here to generate unique id?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
895 #would be messy to handle in the code, but would have to somewhere(in the retrieval code)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
896
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
897 $length = length($data[$hpos{'PROBE_SEQUENCE'}]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
898 #$probe_string .= "\t${psid}\t".$data[$hpos{'PROBE_ID'}]."\t${length}\t$ac_id\t${class}\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
899
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
900 $op = Bio::EnsEMBL::Funcgen::Probe->new(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
901 -NAME => $data[$hpos{'PROBE_ID'}],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
902 -LENGTH => $length,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
903 -ARRAY => $array,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
904 -ARRAY_CHIP_ID => $achip->dbID(),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
905 -CLASS => $class,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
906 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
907
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
908
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
909 %{$pfs{$data[$hpos{'PROBE_ID'}]}} = (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
910 probe => $op,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
911 features => [],
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
912 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
913
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
914
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
915 ###PROBE FEATURES
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
916 #How can we be certain that we have the same mapping in the DB?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
917 #Put checks in here for build?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
918 #Need to handle controls/randoms here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
919 #won't have features but will have results!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
920 #The format of the pos file looks like it should have all the data required, but
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
921 # chromsome is missing, first undef :(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
922 #Need to use $pos here instead of .pos file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
923 #However have problems defining probe class, as not populated in test set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
924 #derive from container! :(
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
925 #Ignore controls/random as they won't have a region
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
926 #Also need to handle multiple mappings?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
927 #As results reference probes, no features, can have multiple features on different builds
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
928
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
929 #if($class eq "EXPERIMENTAL"){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
930
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
931 #if(exists $regions{$data[$hpos{'SEQ_ID'}]}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
932 # $fid++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
933 # $pf_string .= "\t".$regions{$data[$hpos{'SEQ_ID'}]}{'seq_region_id'}."\t".$data[$hpos{'POSITION'}]."\t".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
934 # ($data[$hpos{'POSITION'}] + $length)."\t${strand}\t".$regions{$data[$hpos{'SEQ_ID'}]}{'coord_system_id'}.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
935 # "\t${pid}\t${anal_id}\t".$data[$hpos{'MISMATCH'}]."\t${cig_line}\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
936 #$loc .= $regions{$data[$hpos{'SEQ_ID'}]}{'seq_region_id'}.":".$data[$hpos{'POSITION'}].
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
937 # "-".($data[$hpos{'POSITION'}] + $length).";" if ($self->{'_dump_fasta'});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
938 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
939 # else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
940 # die("No regions defined for ".$data[$hpos{'SEQ_ID'}]." ".$data[$hpos{'PROBE_ID'}].
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
941 #" with family ".$data[$hpos{'CONTAINER'}]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
942 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
943
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
944
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
945
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
946
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
947 if ($self->dump_fasta()){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
948 #(my $chr = $probe_pos{$data[$hpos{'PROBE_ID'}]}->{'chr'}) =~ s/chr//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
949
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
950 #$loc .= $chr.":".$probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'}."-".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
951 # ($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'}+ $length).";";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
952
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
953 #$fasta .= ">".$data[$hpos{'PROBE_ID'}]."\t".$data[$hpos{'CHROMOSOME'}].
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
954 # "\t$loc\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
955
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
956
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
957
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
958 #filter controls/randoms? Or would it be sensible to see where they map
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
959 #wrap seq here?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
960 #$fasta .= ">".$data[$hpos{'PROBE_ID'}]."\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
962
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
963 #To use this for mapping, we really need the dbID nr fasta
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
964 #This can be generated after the import, or maybe during resolve?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
965 #This is also currently done on a chip level, where as the cache is resolved at the array level
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
966 #We could simply cat the files before resolving the fasta file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
967 #Need to do this otherwise we risk overwriting the fasta file with incomplete data.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
968 #Can we validate sequence across probes with same name in this step?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
969 #Just use probe name for now.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
970
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
971 #We could cat and sort the fastas to make sure we have the same sequences
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
972 #Need to dump the design_id in the fasta header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
973 #This would also reduce IO on the DB as identical probe will be consecutive, hence just one query to get the id.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
974
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
975 #Changed th format an content of this to facilitate dbID nr fasta file generation and sequence validation
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
976
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
977 $fasta .= ">".$data[$hpos{'PROBE_ID'}]."\t".$achip->design_id."\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
978
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
979 #Print fasta every 10000 lines
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
980 if(! ($. % 10000)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
981 print $f_out $fasta;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
982 $fasta = '';
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
983 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
984 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
985
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
986
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
987 #Hack!!!!!! Still importing probe (and result?)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
988 next if(! $self->cache_slice($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'chr'}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
989 #warn("Skipping non standard probe (".$data[$hpos{'PROBE_ID'}].") with location:\t$loc\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
990
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
991
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
992 $of = Bio::EnsEMBL::Funcgen::ProbeFeature->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
993 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
994 -START => $probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
995 -END =>($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'start'} + $length),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
996 -STRAND => $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
997 -SLICE => $self->cache_slice($probe_pos{$data[$hpos{'PROBE_ID'}]}->{'chr'}),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
998 -ANALYSIS => $anal,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
999 -MISMATCHCOUNT => $data[$hpos{'MISMATCH'}],#Is this always 0 for import? remove from header hash?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1000 -PROBE => undef, #Need to update this in the store method
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1001 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1002
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1003
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1004 push @{$pfs{$data[$hpos{'PROBE_ID'}]}{'features'}}, $of;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1005
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1006 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1007
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1008 #need to store last data here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1009 $self->store_set_probes_features($achip->dbID(), \%pfs, $ops);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1010 $self->log(join("\n", @log));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1011 $achip->adaptor->store_status("IMPORTED", $achip);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1012 $self->log("ArrayChip:\t".$achip->design_id()." has been IMPORTED");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1013
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1014 if ($self->dump_fasta()){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1015 print $f_out $fasta;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1016 close($f_out);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1017 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1018
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1019 $self->log("Imported design from:\t".$achip->name().".ndf", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1020
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1021
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1022
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1023 #$self->{'_probe_cache'} = undef;#As we can't get Y and Y info from the DB, this is only possible as the results files contain X and Y info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1024 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1025
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1026 #Should we build hash of probe_names:probe_feature_ids here for results import
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1027 #Should we dump this as a lookup file for easier recoverability
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1028 #This is the biggest step in the import
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1029 #Building the hash would be fastest but least recoverable
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1030 #Would have to write recover statments for each step i.e. build the most recent data structure required for the next import step
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1031 #Individual queries for each result would take ages
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1032 #This is all assuming there are no random records in the table i.e. ID series is linear with no gaps.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1033
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1034
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1035 #Could throw a random validation check in every X entries?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1036 #This would only work for non-parallel imports
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1037 #periodic import when hit new probe_set, but no new data printed
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1038
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1039 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1040
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1041
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1042 $self->log("Finished parsing probe data");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1043 #Total probe_sets:\t$psid\n".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1044 # "Total probes:\t$pid\nTotal probe_features:\t$fid");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1045
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1046
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1047 $self->resolve_probe_data();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1048
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1049 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1050 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1051
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1052
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1053
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1054
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1055 =head2 read_and_import_results_data
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1056
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1057 Example : $imp->read_results_data();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1058 Description: Parses and dumps raw results to file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1059 Returntype : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1060 Exceptions : none
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1061 Caller : Importer
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1062 Status : at risk
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1063
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1064 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1065
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1066
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1067 sub read_and_import_results_data{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1068 my $self = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1069
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1070 $self->log("Parsing ".$self->vendor()." results");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1071 my (@header, @data, @design_ids, @lines);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1072 my ($fh, $pid, $line, $file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1073 my $anal = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("RawValue");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1074 my $result_set = $self->get_import_ResultSet($anal, 'channel');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1075
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1076
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1077 if ($result_set) { #we have some new data to import
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1078
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1079 foreach my $echip (@{$self->experiment->get_ExperimentalChips()}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1080
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1081 #if( ! $echip->has_status('IMPORTED')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1082
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1083 foreach my $chan (@{$echip->get_Channels()}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1084
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1085 if ( ! $chan->has_status('IMPORTED')) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1086
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1087 #we need to set write_mage here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1088
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1089 my $array = $echip->get_ArrayChip->get_Array();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1090
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1091 $self->get_probe_cache_by_Array($array) || throw('Failed to get the probe cache handle for results import, resolve cache here?');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1092 my ($probe_elem, $score_elem, %hpos);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1093 my $cnt = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1094 my $r_string = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1095 my $chan_name = $echip->unique_id()."_".$self->get_config('dye_freqs')->{$chan->dye()};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1096 my $cc_id = $result_set->get_chip_channel_id($chan->dbID());
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1097
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1098
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1099 #if ($self->recovery()) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1100 # $self->log("Rolling back results for channel:\t${chan_name}");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1101 # $self->db->rollback_results($cc_id);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1102 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1104 #open/backup output
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1105 my $out_file = $self->get_dir("raw")."/result.".$chan_name.".txt";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1106 $self->backup_file($out_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1107 my $r_out = open_file($out_file, '>');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1109 (my $alt_chan_name = $chan_name) =~ s/\_/\_1h\_/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1110 my $found = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1111
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1112 FILE: foreach my $name($chan_name, $alt_chan_name){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1114 foreach my $suffix ("_pair.txt", ".pair", ".txt") {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1116 $file = $self->get_dir("results")."/".$name.$suffix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1118 if (-f $file) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1119 $found = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1120 last FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1121 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1122 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1123 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1124
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1125 throw("Could not find result file for Channel(${chan_name}) in ".$self->get_dir('results')) if ! $found;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1127 #open/slurp input
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1128 $self->log("Reading result for channel $chan_name:\t$file", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1129 $fh = open_file($file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1130 @lines = <$fh>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1131 close($fh);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1134 ###PROCESS HEADER
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1135
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1136 foreach my $i (0..$#lines) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1137
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1138 if ($lines[$i] =~ /PROBE_ID/o) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1139 $lines[$i] =~ s/\r*\n//o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1140 @data = split/\t/o, $lines[$i];
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1142 %hpos = %{$self->set_header_hash(\@data, $self->get_config('result_fields'))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1143
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1144 #remove header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1145 splice @lines, $i, 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1147 last; #finished processing header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1148 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1149 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1151 #we need to sort the result files based on the unique key(name at present, should replace with seq at some point)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1152 @lines = sort {(split/\t/o, $a)[$hpos{'PROBE_ID'}] cmp (split/\t/o, $b)[$hpos{'PROBE_ID'}]} @lines;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1153
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1154 $self->log('Parsing results', 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1155
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1156 foreach $line(@lines) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1158 #can we preprocess effectively?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1159 next if $line =~ /^#/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1160 next if $line =~ /NGS_CONTROLS/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1161 next if $line =~ /V_CODE/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1162 next if $line =~ /H_CODE/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1163 next if $line =~ /RANDOM/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1164
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1165 $line =~ s/\r*\n//o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1166 @data = split/\t/o, $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1167
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1168 ###PROCESS HEADER
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1169 #if ($line =~ /PROBE_ID/o){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1170 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1171 # %hpos = %{$self->set_header_hash(\@data, $self->get_config('result_fields'))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1172 # next;#finished processing header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1173 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1174
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1175 ###PROCESS DATA
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1176 #Is this string concat causing the slow down, would it befaster to use an array and print a join?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1178 if ($pid = $self->get_probe_id_by_name_Array($data[$hpos{'PROBE_ID'}], $array)) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1179 $cnt ++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1180 $r_string .= '\N'."\t${pid}\t".$data[$hpos{'PM'}]."\t${cc_id}\t".$data[$hpos{'X'}]."\t".$data[$hpos{'Y'}]."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1181 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1182 warn "Found unfiltered non-experimental probe in input $data[$hpos{'PROBE_ID'}]";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1183 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1184
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1185 ###PRINT SOME RESULTS
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1186 if ($cnt > 10000) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1187 $cnt = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1188 print $r_out $r_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1189 $r_string ="";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1190 #could we fork here and import in the background?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1191 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1192
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1193 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1194 #PRINT/CLOSE Channel file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1195 print $r_out $r_string;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1196 close($r_out);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1197 $self->log("Finished parsing $chan_name result", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1198
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1199 #Import directly here to avoid having to reparse all results if we crash!!!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1200 $self->log("Importing:\t$out_file");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1201 $self->db->load_table_data("result", $out_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1202 $self->log("Finished importing:\t$out_file", 1);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1203 $chan->adaptor->store_status('IMPORTED', $chan);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1204
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1205
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1206 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1207 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1208 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1209 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1210 else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1211 $self->log("Skipping results parse and import");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1212 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1214 $self->log("Finished parsing and importing results");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1215
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1216 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1217 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1218
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1220
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1221 1;