annotate variant_effect_predictor/Bio/EnsEMBL/Funcgen/Parsers/cisred.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =head1 LICENSE
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 Copyright (c) 1999-2011 The European Bioinformatics Institute and
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4 Genome Research Limited. All rights reserved.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6 This software is distributed under a modified Apache license.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 For license details, please see
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 http://www.ensembl.org/info/about/code_licence.html
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =head1 CONTACT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13 Please email comments or questions to the public Ensembl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 developers list at <ensembl-dev@ebi.ac.uk>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 Questions may also be sent to the Ensembl help desk at
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 <helpdesk@ensembl.org>.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 package Bio::EnsEMBL::Funcgen::Parsers::cisred;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 use File::Basename;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 # To get files for CisRed data, download the following 2 files (e.g. via wget):
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 # http://www.cisred.org/content/databases_methods/human_2/data_files/motifs.txt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 # http://www.cisred.org/content/databases_methods/human_2/data_files/search_regions.txt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 #No longer valid urls, now use the following for ensembl formats for all species:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 #http://www.bcgsc.ca/downloads/cisred/temp/cisRED4Ensembl/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 #naminf may not be obvious, may have to cross reference with above previous urls to get build info
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 # Format of motifs.txt (note group_name often blank)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 #name chromosome start end strand group_name ensembl_gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 #craHsap1 1 168129978 168129997 -1 1 ENSG00000000457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 #craHsap2 1 168129772 168129781 -1 2 ENSG00000000457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 #craHsap3 1 168129745 168129756 -1 3 ENSG00000000457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 #craHsap4 1 168129746 168129753 -1 4 ENSG00000000457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 #craHsap5 1 168129745 168129752 -1 5 ENSG00000000457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 #craHsap6 1 168129741 168129757 -1 6 ENSG00000000457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 # Format of search_regions.txt
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 # name chromosome start end strand ensembl_gene_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 # 1 17 39822200 39824467 -1 ENSG00000005961
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 # 8 17 23151483 23153621 -1 ENSG00000007171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 # 14 1 166434638 166437230 -1 ENSG00000007908
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 # 19 1 23602820 23605631 -1 ENSG00000007968
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 use Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 use Bio::EnsEMBL::DBEntry;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 use Bio::EnsEMBL::Funcgen::ExternalFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 use Bio::EnsEMBL::Utils::Exception qw( throw );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 use vars qw(@ISA);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 @ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 sub new {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 my $caller = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 my $class = ref($caller) || $caller;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 my $self = $class->SUPER::new(@_, type => 'cisRED');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 #Set default feature_type and feature_set config
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 $self->{static_config}{feature_types} =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 'cisRED Search Region' => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 -name => 'cisRED Search Region',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 -class => 'Search Region',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 -description => 'cisRED search region',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 'cisRED Motif' => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 -name => 'cisRED Motif',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 -class => 'Regulatory Motif',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 -description => 'cisRED atomic motif',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 $self->{static_config}{analyses} =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 cisRED => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 -logic_name => 'cisRED',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 -description => 'cisRED motif search (www.cisred.org)',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 -display_label => 'cisRED',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 -displayable => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 $self->{static_config}{feature_sets} =
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 'cisRED search regions' =>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 analyses => $self->{static_config}{analyses},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 feature_types => $self->{static_config}{feature_types},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 feature_set => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 #feature_type and analysis here are the keys from above
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 -feature_type => 'cisRED Search Region',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 -display_label => 'cisRED searches',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 -analysis => 'cisRED',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 xrefs => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 'cisRED motifs' =>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 analyses => $self->{static_config}{analyses},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 feature_types => $self->{static_config}{feature_types},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 feature_set => {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 #feature_type and analysis here are the keys from above
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 -feature_type => 'cisRED Motif',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 -analysis => 'cisRED',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 xrefs => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 },
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 #$self->validate_and_store_feature_types;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 $self->validate_and_store_config([keys %{$self->{static_config}{feature_sets}}]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 $self->set_feature_sets;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 return $self;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 # Parse file and return hashref containing:
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 # - arrayref of features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 # - arrayref of factors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 #To do
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 # 1 This needs to take both file names, motifs, then search regions. Like the Bed/GFF importers do.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 sub parse_and_load {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 my ($self, $files, $old_assembly, $new_assembly) = @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 $self->log_header("Parsing cisRED data");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 if(scalar(@$files) != 2){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 throw('You must currently define a motif and search file to load cisRED features from:\t'.join(' ', @$files));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 my $analysis_adaptor = $self->db->get_AnalysisAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 #my %features_by_group; # name -> factor_id
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 my %groups;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 my %slice_cache;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 my $extf_adaptor = $self->db->get_ExternalFeatureAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 my $dbentry_adaptor = $self->db->get_DBEntryAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 my $ftype_adaptor = $self->db->get_FeatureTypeAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 #my $display_name_cache = $self->build_display_name_cache('gene');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 # this object is only used for projection
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 my $dummy_analysis = new Bio::EnsEMBL::Analysis(-logic_name => 'CisREDProjection');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 # ----------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 # We need a "blank" factor for those features which aren't assigned factors
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 # Done this way to maintain referential integrity
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 #my $blank_factor_id = $self->get_blank_factor_id($db_adaptor);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 #More validation of files here?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 my ($motif_file) = grep(/motif/, @$files);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 my ($search_file) = grep(/search/, @$files);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 my $species = $self->db->species;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 if(! $species){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 throw('Must define a species to define the external_db');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 #Just to make sure we hav homo_sapiens and not Homo Sapiens
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 ($species = lc($species)) =~ s/ /_/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 # Parse motifs.txt file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 $self->log_header("Parsing cisRED motifs from $motif_file");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 my $skipped = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 my $skipped_xref = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 #my $coords_changed = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 my $cnt = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 my $set = $self->{static_config}{feature_sets}{'cisRED motifs'}{feature_set};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 open (FILE, "<$motif_file") || die "Can't open $motif_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 <FILE>; # skip header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 while (<FILE>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 next if ($_ =~ /^\s*\#/o || $_ =~ /^\s*$/o);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 #name chromosome start end strand group_name ensembl_gene
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 #craHsap1 1 168129978 168129997 - crtHsap40066,crtHsap40060 ENSG00000000457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 #craHsap2 1 168129772 168129781 - crtHsap40068,crtHsap40193,crtHsap40130 ENSG00000000457
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 #So we only ever have one atomic motif, which may belong to several groups
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 #Do not store atmoic motifs as feature types as this is the actual feature
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 #simply use the feature_set->feature_type and store the atmoic motif id as the name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 my ($motif_name, $chromosome, $start, $end, $strand, $groups, $gene_id) = split/\t/o;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 #($gene_id) = $gene_id =~ /(ENS.*G\d{11})/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 my @group_names = split/,/, $groups;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 #These are stranded features, so either - or +, never 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 $strand = ($strand eq '-') ? -1 : 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 if(! exists $slice_cache{$chromosome}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 if($old_assembly){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 $slice_cache{$chromosome} = $self->slice_adaptor->fetch_by_region('chromosome',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 $chromosome,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 $old_assembly);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 $slice_cache{$chromosome} = $self->slice_adaptor->fetch_by_region('chromosome', $chromosome);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 if(! defined $slice_cache{$chromosome}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 warn "Can't get slice $chromosome for motif $motif_name\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 $skipped++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 #get feature_type first
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 #we are not maintaining this link in the DB!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 #Do we need another xref for this or a different table?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 #if ($group_name && $group_name ne '' && $group_name !~ /\s/o) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 # if(! exists $features_by_group{$group_name}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250 # $features_by_group{$group_name} = $ftype_adaptor->fetch_by_name('crtHsap'.$group_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 #
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 # if(! defined $features_by_group{$group_name}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 # ($features_by_group{$group_name}) = @{$ftype_adaptor->store(Bio::EnsEMBL::Funcgen::FeatureType->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254 # (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 # -name => 'crtHsap'.$group_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 # -class => 'Regulatory Motif',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 # -description => 'cisRED group',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 # ))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 #}else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263 # throw("Found cisRED feature $motif_name with no group_name, unable to defined feature_type");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 foreach my $group(@group_names){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268 next if exists $groups{$group};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 #else store the new group as a feature_type and set $group to be the feature_type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 ($group) = @{$ftype_adaptor->store(Bio::EnsEMBL::Funcgen::FeatureType->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273 -name => $group,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 -class => 'Regulatory Motif',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 -description => 'cisRED group',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 ))};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 #my $ftype = (defined $features_by_group{$group_name}) ? $features_by_group{$group_name} : $self->{'feature_sets'}{'cisRED group motifs'}->feature_type;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284 my $feature = Bio::EnsEMBL::Funcgen::ExternalFeature->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286 -display_label => $motif_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
287 -start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
288 -end => $end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
289 -strand => $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
290 #-feature_type => $ftype,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
291 -associated_feature_types => \@group_names,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
292 -feature_set => $set,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
293 -slice => $slice_cache{$chromosome},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
294 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
295
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
296
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
297
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
298 # project if necessary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
299 if ($new_assembly) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
300 $feature = $self->project_feature($feature, $new_assembly);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
301
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
302 if(! defined $feature){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
303 $skipped ++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
304 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
305 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
306
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
307 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
308
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
309 ($feature) = @{$extf_adaptor->store($feature)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
310 $cnt++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
311
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
312
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
313
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
314 #We don't care so much about loading features for retired Genes here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
315 #as the Genes are only used to define the search regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
316 #Not a direct alignment as with the miRanda set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
317
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
318 #However, adding an xref will create dead link in the browser
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
319
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
320 #Build Xref here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
321 if (! $gene_id) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
322 warn("No xref available for motif $motif_name\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
323 $skipped_xref++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
324 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
325 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
326
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
327 my $display_name = $self->get_core_display_name_by_stable_id($self->db->dnadb, $gene_id, 'gene');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
328
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
329 #Handle release/version in xref version as stable_id version?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
330
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
331 my $dbentry = Bio::EnsEMBL::DBEntry->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
332 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
333 -dbname => $species.'_core_Gene',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
334 #-release => $self->db->_get_schema_build($self->db->dnadb),
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
335 #-release => '49_36b',#harcoded for human
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
336 -release => '49_37b', #hardcoded for mouse
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
337 -status => 'KNOWNXREF',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
338 #-display_label_linkable => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
339 -db_display_name => 'EnsemblGene',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
340 -type => 'MISC',#this is external_db.type
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
341 -primary_id => $gene_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
342 -display_id => $display_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
343 -info_type => 'MISC',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
344 -info_text => 'GENE',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
345 -linkage_annotation => 'cisRED motif gene',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
346 -analysis => $set->analysis,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
347 #could have version here if we use the correct dnadb to build the cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
348 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
349 $dbentry_adaptor->store($dbentry, $feature->dbID, 'ExternalFeature', 1);#1 is ignore release flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
350 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
351
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
352
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
353 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
354
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
355 $self->log("Stored $cnt cisRED ExternalFeature motif");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
356 $self->log("Skipped $skipped cisRED ExternalFeature motif imports");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
357 $self->log("Skipped an additional $skipped_xref DBEntry imports");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
358
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
359 #Now store states
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
360 foreach my $status(qw(DISPLAYABLE MART_DISPLAYABLE)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
361 $set->adaptor->store_status($status, $set);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
362 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
363
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
364
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
365
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
366 # ----------------------------------------
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
367 # Search regions
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
368 # read search_regions.txt from same location as $file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
369
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
370 #my $search_regions_file = dirname($file) . "/search_regions.txt";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
371 #my $search_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
372 #($search_regions_file = $file) =~ s/motifs/searchregions/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
373
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
374 $skipped = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
375 $cnt = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
376 $skipped_xref = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
377 $set = $self->{static_config}{feature_sets}{'cisRED search regions'}{feature_set};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
378
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
379 $self->log_header("Parsing cisRED search regions from $search_file");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
380 open (SEARCH_REGIONS, "<$search_file") || die "Can't open $search_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
381 <SEARCH_REGIONS>; # skip header
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
382
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
383 while (<SEARCH_REGIONS>) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
384 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
385 my ($id, $chromosome, $start, $end, $strand, $gene_id) = split;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
386 my $display_id = $self->get_core_display_name_by_stable_id($self->db->dnadb, $gene_id, 'gene');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
387 my $name = "CisRed_Search_$id";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
388
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
389 if(! exists $slice_cache{$chromosome}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
390
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
391 if($old_assembly){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
392 $slice_cache{$chromosome} = $self->slice_adaptor->fetch_by_region('chromosome',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
393 $chromosome,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
394 undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
395 undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
396 undef,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
397 $old_assembly);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
398 }else{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
399 $slice_cache{$chromosome} = $self->slice_adaptor->fetch_by_region('chromosome', $chromosome);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
400 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
401 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
402
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
403 if(! defined $slice_cache{$chromosome}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
404 warn "Can't get slice $chromosome for search region $name\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
405 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
406 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
407
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
408
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
409
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
410
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
411
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
412 my $search_feature = Bio::EnsEMBL::Funcgen::ExternalFeature->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
413 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
414 -display_label => $name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
415 -start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
416 -end => $end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
417 -strand => $strand,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
418 -feature_set => $set,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
419 -slice => $slice_cache{$chromosome},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
420 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
421
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
422
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
423
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
424 # project if necessary
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
425 if ($new_assembly) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
426 $search_feature = $self->project_feature($search_feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
427
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
428 if(! defined $search_feature){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
429 $skipped ++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
430 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
431 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
432 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
433
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
434 $extf_adaptor->store($search_feature);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
435 $cnt++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
436
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
437 #Build Xref here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
438 #need to validate gene_id here!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
439
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
440 if (! $gene_id) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
441 warn("Can't get internal ID for $gene_id\n");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
442 $skipped_xref++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
443 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
444 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
445
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
446 my $display_name = $self->get_core_display_name_by_stable_id($self->db->dnadb, $gene_id, 'gene');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
447
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
448 my $dbentry = Bio::EnsEMBL::DBEntry->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
449 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
450 -dbname => $species.'_core_Gene',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
451 #-release => $self->db->dnadb->dbc->dbname,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
452 -status => 'KNOWNXREF',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
453 #-display_label_linkable => 1,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
454 #-db_display_name => $self->db->dnadb->dbc->dbname,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
455 -db_display_name => 'EnsemblGene',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
456 -type => 'MISC',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
457 -primary_id => $gene_id,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
458 -display_id => $display_name,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
459 -info_type => 'MISC',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
460 -info_text => 'GENE',
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
461 -linkage_annotation => 'cisRED search region gene',#omit?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
462 -analysis => $set->analysis,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
463 #could have version here if we use the correct dnadb to build the cache
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
464 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
465 $dbentry_adaptor->store($dbentry, $search_feature->dbID, 'ExternalFeature', 1);#1 is ignore release flag
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
466 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
467
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
468 close(SEARCH_REGIONS);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
469
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
470
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
471 $self->log("Stored $cnt cisRED search region ExternalFeatures");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
472 $self->log("Skipped $skipped cisRED search region ExternalFeatures");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
473 $self->log("Skipped an additional $skipped_xref cisRED search region DBEntry imports");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
474
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
475 #No MART_DISPLAYABLE here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
476 $set->adaptor->store_status('DISPLAYABLE', $set);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
477
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
478
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
479 #print "$coords_changed features had their co-ordinates changed as a result of assembly mapping.\n" if ($new_assembly);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
480
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
481 return;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
482
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
483 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
484
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
485
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
486
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
487 1;