comparison variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/biotiffin.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1f6dce3d34e0
1 =head1 LICENSE
2
3 Copyright (c) 1999-2011 The European Bioinformatics Institute and
4 Genome Research Limited. All rights reserved.
5
6 This software is distributed under a modified Apache license.
7 For license details, please see
8
9 http://www.ensembl.org/info/about/code_licence.html
10
11 =head1 CONTACT
12
13 Please email comments or questions to the public Ensembl
14 developers list at <ensembl-dev@ebi.ac.uk>.
15
16 Questions may also be sent to the Ensembl help desk at
17 <helpdesk@ensembl.org>.
18
19 =cut
20
21 package Bio::EnsEMBL::Funcgen::Parsers::biotiffin;
22
23 use strict;
24
25 use File::Basename;
26
27 # To get files for bioTIFFIN, download the following GFF file (e.g. via wget):
28 #
29 # http://td-blade.gurdon.cam.ac.uk/tad26/fly-tiffinScan-tiffin12.dm3.gff.gz
30
31 # Thomas Down <thomas.down@gurdon.cam.ac.uk>
32
33 #
34 # 3R MotifScanner TIFDMEM0000001 936391 936401 0.0 + 0
35 # 3R MotifScanner TIFDMEM0000001 13455911 13455921 0.0 - 0
36 # 3R MotifScanner TIFDMEM0000001 17062830 17062840 0.0 + 0
37
38 use Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser;
39 use Bio::EnsEMBL::DBEntry;
40 use Bio::EnsEMBL::Funcgen::ExternalFeature;
41 use Bio::EnsEMBL::Utils::Exception qw( throw );
42 use Bio::EnsEMBL::Utils::Argument qw( rearrange );
43
44 use vars qw(@ISA);
45 @ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser);
46
47
48 sub new {
49 my $caller = shift;
50 my $class = ref($caller) || $caller;
51
52 my $self = $class->SUPER::new(@_, type => 'BioTiffin');
53
54 #Set default feature_type and feature_set config
55
56 #We need to capture version/release/data of external feature sets.
57 #This can be nested in the description? Need to add description to feature_set?
58
59 $self->{static_config}{feature_types} =
60 {
61 'BioTIFFIN Motif' => {
62 name => 'BioTIFFIN Motif',
63 class => 'Regulatory Motif',
64 description => 'BioTIFFIN motif',
65 }
66 };
67
68 $self->{static_config}{analyses} =
69 {
70 'BioTIFFIN Motif' => {
71 -logic_name => 'BioTIFFIN Motif',
72 -description => 'BioTIFFIN regulatory motif database',
73 -display_label => 'BioTIFFIN motifs',
74 -displayable => 1,
75 },
76 };
77
78 $self->{static_config}{feature_sets} =
79 {
80 'BioTIFFIN Motif' =>
81 {
82 feature_set => {
83 -feature_type => 'BioTIFFIN Motif',
84 -analysis => 'BioTIFFIN Motif',
85 },
86 xrefs => 0,
87 }
88 };
89
90
91 #Move xref flag here?
92 $self->{config} = {
93 'BioTIFFIN Motif' => {
94 file => $ENV{'EFG_DATA'}.'/input/BioTIFFIN/fly-tiffinScan-tiffin12.dm3.gff',
95 gff_attrs => {
96 'ID' => 1,
97 },
98 },
99 };
100
101 $self->validate_and_store_config([keys %{$self->{static_config}{feature_sets}}]);
102 $self->set_feature_sets;
103
104 return $self;
105 }
106
107
108
109 # Parse file and return hashref containing:
110 #
111 # - arrayref of features
112 # - arrayref of factors
113
114
115
116
117 sub parse_and_load {
118 my ($self, $files, $old_assembly, $new_assembly) = @_;
119
120 if(scalar(@$files) != 1){
121 throw('You must provide a unique file path to load VISTA features from:\t'.join(' ', @$files));
122 }
123
124
125 my %slice_cache;
126 my $extf_adaptor = $self->db->get_ExternalFeatureAdaptor;
127 my $dbentry_adaptor = $self->db->get_DBEntryAdaptor;
128 my $ftype_adaptor = $self->db->get_FeatureTypeAdaptor;
129 # this object is only used for projection
130 my $dummy_analysis = new Bio::EnsEMBL::Analysis(-logic_name => 'BioTIFFINProjection');#do we need this?
131 my $species = $self->db->species;
132 if(! $species){
133 throw('Must define a species to define the external_db');
134 }
135 #Just to make sure we hav homo_sapiens and not Homo Sapiens
136 ($species = lc($species)) =~ s/ /_/;
137
138
139 if(scalar @{$self->import_sets} != 1){
140 throw('biotiffin parser currently only supports one import FeatureSet');
141 }
142
143 my ($import_set) = @{$self->import_sets};
144
145
146 #foreach my $import_set(@{$self->import_sets}){
147 $self->log_header("Parsing $import_set data");
148
149 my %motif_cache; # name -> factor_id
150 my $config = $self->{'config'}{$import_set};
151 my $fset = $self->{static_config}{feature_sets}{$import_set}{feature_set};
152 my %gff_attrs = %{$config->{'gff_attrs'}};
153
154
155 # Parse motifs.txt file
156 #my $file = $config->{'file'};
157 my $file = $files->[0];
158 my $skipped = 0;
159 my $motif_cnt = 0;
160 my $factor_xref_cnt = 0;
161 my $feature_cnt = 0;
162 my $feature_target_cnt = 0;
163
164 open (FILE, "<$file") || die("Can't open $file\n$!\n");
165
166 LINE: while (my $line = <FILE>) {
167 chomp $line;
168
169 #GFF3
170 #3R MotifScanner TIFDMEM0000001 936391 936401 0.0 + 0
171 #3R MotifScanner TIFDMEM0000001 13455911 13455921 0.0 - 0
172 #3R MotifScanner TIFDMEM0000001 17062830 17062840 0.0 + 0
173 #3R MotifScanner TIFDMEM0000001 17973965 17973975 0.0 + 0
174
175 #seq_name, source, feature, start, end, score, strand, frame, [attrs]
176 my ($chromosome, $program, $feature, $start, $end, $score, $strand, undef) = split /\t/o, $line;
177
178 if(! exists $slice_cache{$chromosome}){
179
180 if($old_assembly){
181 $slice_cache{$chromosome} = $self->slice_adaptor->fetch_by_region('chromosome',
182 $chromosome,
183 undef,
184 undef,
185 undef,
186 $old_assembly);
187 } else {
188 $slice_cache{$chromosome} = $self->slice_adaptor->fetch_by_region('chromosome', $chromosome);
189 }
190 }
191
192 if(! defined $slice_cache{$chromosome}){
193 warn "Can't get slice $chromosome for motif $feature;\n";
194 $skipped++;
195 next;
196 }
197
198 if(! exists $motif_cache{$feature}){
199
200 $motif_cache{$feature} = $ftype_adaptor->fetch_by_name($feature);
201
202 if(! defined $motif_cache{$feature}){
203
204 ($motif_cache{$feature}) = @{$ftype_adaptor->store(Bio::EnsEMBL::Funcgen::FeatureType->new
205 (
206 -name => $feature,
207 -class => $fset->feature_type->class,
208 -description => $fset->feature_type->description,
209 ))};
210
211 $motif_cnt ++;
212 }
213 }
214
215 my $feature_type = $motif_cache{$feature};
216
217 #Now build actual feature
218
219 $feature = Bio::EnsEMBL::Funcgen::ExternalFeature->new
220 (
221 -display_label => $feature,
222 -start => $start,
223 -end => $end,
224 -strand => (($strand eq '+') ? 1 : -1),
225 -feature_type => $feature_type,
226 -feature_set => $fset,
227 -slice => $slice_cache{$chromosome},
228 );
229
230
231 # project if necessary
232 if ($new_assembly) {
233 $feature = $self->project_feature($feature, $new_assembly);
234
235 if(! defined $feature){
236 $skipped ++;
237 next;
238 }
239 }
240
241 ($feature) = @{$extf_adaptor->store($feature)};
242 $feature_cnt++;
243
244
245 }
246
247 close FILE;
248
249 $self->log("Loaded ".$fset->name);
250 $self->log("$motif_cnt feature types");
251 $self->log("$feature_cnt features");
252 $self->log("Skipped $skipped features");
253
254 #}
255
256 return;
257 }
258
259 1;