0
|
1 =head1 LICENSE
|
|
2
|
|
3 Copyright (c) 1999-2012 The European Bioinformatics Institute and
|
|
4 Genome Research Limited. All rights reserved.
|
|
5
|
|
6 This software is distributed under a modified Apache license.
|
|
7 For license details, please see
|
|
8
|
|
9 http://www.ensembl.org/info/about/code_licence.html
|
|
10
|
|
11 =head1 CONTACT
|
|
12
|
|
13 Please email comments or questions to the public Ensembl
|
|
14 developers list at <ensembl-dev@ebi.ac.uk>.
|
|
15
|
|
16 Questions may also be sent to the Ensembl help desk at
|
|
17 <helpdesk@ensembl.org>.
|
|
18
|
|
19 =cut
|
|
20
|
|
21 package Bio::EnsEMBL::Funcgen::Parsers::miranda;
|
|
22
|
|
23 use strict;
|
|
24
|
|
25 use Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser;
|
|
26 use Bio::EnsEMBL::DBEntry;
|
|
27 use Bio::EnsEMBL::Funcgen::ExternalFeature;
|
|
28
|
|
29 use vars qw(@ISA);
|
|
30 @ISA = qw(Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser);
|
|
31
|
|
32
|
|
33
|
|
34 sub new {
|
|
35 my $caller = shift;
|
|
36 my $class = ref($caller) || $caller;
|
|
37
|
|
38 my $self = $class->SUPER::new(@_, type => 'miRanda');
|
|
39
|
|
40 #Set default feature_type and feature_set config
|
|
41 $self->{static_config}{feature_types} =
|
|
42 {
|
|
43 'miRanda Target' => {
|
|
44 -name => 'miRanda Target',
|
|
45 -class => 'RNA',
|
|
46 -description => 'miRanda microRNA target',
|
|
47 },
|
|
48 };
|
|
49
|
|
50 $self->{static_config}{analyses} =
|
|
51 {
|
|
52 miRanda => {
|
|
53 -logic_name => 'miRanda',
|
|
54 -description => '<a href="http://www.ebi.ac.uk/enright-srv/microcosm/htdocs/targets/v5/consset.html">miRanda microRNA target predictions</a>',
|
|
55 -display_label => 'miRanda Targets',
|
|
56 -displayable => 1,
|
|
57 },
|
|
58 };
|
|
59
|
|
60 $self->{static_config}{feature_sets}{'miRanda miRNA targets'} =
|
|
61 {
|
|
62 #analyses => $self->{static_config}{analyses},
|
|
63 #feature_types => $self->{static_config}{feature_types},
|
|
64 feature_set =>
|
|
65 {
|
|
66 -feature_type => 'miRanda Target',
|
|
67 -display_name => 'miRanda Targets',
|
|
68 -description => $self->{static_config}{analyses}{miRanda}{-description},
|
|
69 -analysis => 'miRanda',
|
|
70 },
|
|
71 xrefs => 1,
|
|
72 };
|
|
73
|
|
74
|
|
75
|
|
76
|
|
77 #$self->validate_and_store_feature_types;
|
|
78 $self->validate_and_store_config([keys %{$self->{static_config}{feature_sets}}]);
|
|
79 $self->set_feature_sets;
|
|
80
|
|
81 return $self;
|
|
82 }
|
|
83
|
|
84
|
|
85 #TO DO
|
|
86 # In loop logging should only be enable with verbose or change to debug?
|
|
87 # Use count methods?
|
|
88 # Sort input to enable cache clearing(caches max out at ~200MB so not essential)
|
|
89 # Optimise slice cache testing(load only takes 8 min sso not essential)
|
|
90 # Add verbose logging/debug to show reassigned xrefs?
|
|
91
|
|
92 sub parse_and_load{
|
|
93 my ($self, $files, $old_assembly, $new_assembly) = @_;
|
|
94
|
|
95 #Add num files to config and check this in BaseImporter(generically)
|
|
96 if(scalar(@$files) != 1){
|
|
97 throw('You must currently define a single file to load miRanda features from:\t'.join(' ', @$files));
|
|
98 }
|
|
99
|
|
100 my $file = $files->[0];
|
|
101 $self->log_header("Parsing miRanda data from:\t$file");
|
|
102
|
|
103 my $analysis_adaptor = $self->db->get_AnalysisAdaptor();
|
|
104 my $ftype_adaptor = $self->db->get_FeatureTypeAdaptor();
|
|
105 my $extf_adaptor = $self->db->get_ExternalFeatureAdaptor;
|
|
106 my $trans_adaptor = $self->db->dnadb->get_TranscriptAdaptor;
|
|
107 my $dbentry_adaptor = $self->db->get_DBEntryAdaptor;
|
|
108 my $set = $self->{static_config}{feature_sets}{'miRanda miRNA targets'}{feature_set};
|
|
109
|
|
110 my ($dbentry, $schema_build, %features_by_name, %slice_cache, $ens_display_name, %feature_cache);
|
|
111 my (@txs, @sid_dnames, %xref_cache, %seen_mifeat_trans, %skipped_features, %failed_reassignment);
|
|
112 my ($overlaps_3_utr);
|
|
113 # this object is only used for projection
|
|
114 my $dummy_analysis = new Bio::EnsEMBL::Analysis(-logic_name => 'miRandaProjection');
|
|
115
|
|
116 #Some hairy counting to make sure the XREF reassignment
|
|
117 #is doing something sensible
|
|
118 #Can probably change all this to use the count methods
|
|
119 my $slice_skipped = 0;
|
|
120 my $old_mid_skipped = 0;
|
|
121 my $cnt = 0;
|
|
122 my $proj_skipped = 0;
|
|
123 my $seq_skipped = 0;
|
|
124 my $row_cnt = 0;
|
|
125 my $new_xrefs = 0;
|
|
126 my $old_as_new_xrefs = 0;
|
|
127 my $failed_new_sids = 0;
|
|
128 my $utr_failed_old_sids = 0;
|
|
129 my $no_proj_failed_old_sids = 0;
|
|
130 my $previously_skipped = 0;
|
|
131 my $with_overlapping_tx = 0;
|
|
132 my $retired_sids = 0;
|
|
133 my $existing_sids = 0;
|
|
134 my $total_xrefs = 0;
|
|
135 my $species = $self->db->species;
|
|
136 my $analysis = $set->analysis;
|
|
137
|
|
138
|
|
139 #Need to allow this to be passed
|
|
140 $schema_build ||= $self->db->_get_schema_build($self->db->dnadb);
|
|
141
|
|
142 if(! $species){
|
|
143 throw('Must define a species to define the external_db');
|
|
144 }
|
|
145 #Just to make sure we hav homo_sapiens and not Homo Sapiens
|
|
146 ($species = lc($species)) =~ s/ /_/;
|
|
147
|
|
148 my $edb_name = $species.'_core_Transcript',
|
|
149
|
|
150
|
|
151
|
|
152 open (FILE, "<$file") || die "Can't open $file";
|
|
153
|
|
154 #We used to have redundant target features wrt xrefs
|
|
155 #Similarity mmu-miR-192 miRanda miRNA_target 5 127885168 127885188 - . 15.2253 4.048320e-03 ENSMUST00000031367 Slc15a4
|
|
156 #Similarity mmu-miR-192 miRanda miRNA_target 5 127885168 127885188 - . 15.2397 3.945600e-03 ENSMUST00000075376 Slc15a4
|
|
157
|
|
158 #Have now changed this to nr features with multiple xrefs
|
|
159 #Hence have had to remove transcript sid from diplay_label
|
|
160
|
|
161 #Several 'seen' caches have be implemented to prevent redundant storing
|
|
162 #Currently these span the whole data set, but could be cleaned after every feature
|
|
163 #if the input is sorted correctly
|
|
164
|
|
165 #Need to make sure we are counting correctly
|
|
166 #skipped counts will reflect lines in input, not nr features
|
|
167
|
|
168
|
|
169 #Could add UnmappedObjects in here
|
|
170
|
|
171 LINE: while (<FILE>) {
|
|
172 next LINE if ($_ =~ /^\s*\#/o || $_ =~ /^\s*$/o);
|
|
173 $row_cnt ++;
|
|
174 #Added next for old miRbase IDs.
|
|
175
|
|
176 #Sanger
|
|
177 ##GROUP SEQ METHOD FEATURE CHR START END STRAND PHASE SCORE PVALUE_OG TRANSCRIPT_ID EXTERNAL_NAME
|
|
178 #Similarity mmu-miR-707 miRanda miRNA_target 2 120824620 120824640 + . 15.3548 2.796540e-02 ENST00000295228 INHBB
|
|
179
|
|
180
|
|
181 my ($group, $id, $method, $feature, $chr, $start, $end, $strand, undef, undef, undef, $ens_id, $display_name) = split;
|
|
182 #We never use $diplay_name now, as it never match the transcript display name
|
|
183 #which as the transcript number suffic attached to the gene display name
|
|
184 #e.g. BRCA2-001
|
|
185
|
|
186 #%seen_mifeat_trans handles redundancy between existant(i.e. not retired) sids in input
|
|
187 #file and those identified when trying to reannotated a retired sid
|
|
188
|
|
189 #ALREADY ANNOTATED OR SKIPPED
|
|
190 if(exists $seen_mifeat_trans{$id.':'.$chr.':'.$start.':'.$end}{$ens_id} ){
|
|
191 $self->log("Skipping previously reannotated miRNA target:\t".$id.':'.$chr.':'.$start.':'.$end.' - '.$ens_id);
|
|
192 $old_as_new_xrefs ++;
|
|
193 }
|
|
194 elsif(exists $skipped_features{$id.':'.$chr.':'.$start.':'.$end}){
|
|
195 $self->log("Skipping previous failed feature:\t".$id.':'.$chr.':'.$start.':'.$end."\t".
|
|
196 $skipped_features{$id.':'.$chr.':'.$start.':'.$end});
|
|
197 $previously_skipped++;
|
|
198 #Could increment count in here to count old xrefs failed for each fail type
|
|
199 next;
|
|
200 }
|
|
201
|
|
202
|
|
203 #Added next for old miRbase IDs.
|
|
204 if ( $id =~ /\*$/o ){
|
|
205 $self->log("Skipping old miRbase ID:\t$id");
|
|
206
|
|
207 $skipped_features{$id.':'.$chr.':'.$start.':'.$end} = 'Old invalid miRbase ID';
|
|
208 $old_mid_skipped ++;
|
|
209
|
|
210 #$old_xrefs_skipped ++;
|
|
211 #Just want to make sure we have comparable
|
|
212 #old skipped xrefs and re-annotated xrefs
|
|
213 #so this is not useful here
|
|
214
|
|
215
|
|
216 next LINE;
|
|
217 }
|
|
218
|
|
219 $strand = ($strand =~ /\+/o) ? 1 : -1;
|
|
220
|
|
221 #Now moved transript xref info exclusively to xrefs
|
|
222 ##my $id = $ens_id =~ s/[\"\']//g; # strip quotes
|
|
223 #my $id = $ens_id.':'.$seq;
|
|
224
|
|
225
|
|
226 #change this to only test once
|
|
227 #if exists and not defined then skip
|
|
228
|
|
229 if(! defined $slice_cache{$chr}){
|
|
230
|
|
231 #Was originally limiting to chromosome
|
|
232
|
|
233 if($old_assembly){
|
|
234 $slice_cache{$chr} = $self->slice_adaptor->fetch_by_region(undef,
|
|
235 $chr,
|
|
236 undef,
|
|
237 undef,
|
|
238 undef,
|
|
239 $old_assembly);
|
|
240 }else{
|
|
241 $slice_cache{$chr} = $self->slice_adaptor->fetch_by_region(undef, $chr);
|
|
242 }
|
|
243
|
|
244 if(! defined $slice_cache{$chr}){
|
|
245 warn "Can't get slice $chr for sequence $id\n";
|
|
246
|
|
247 $slice_skipped ++;
|
|
248 $skipped_features{$id.':'.$chr.':'.$start.':'.$end} = "Failed to fetch slice $chr";
|
|
249
|
|
250 #Add UnmappedObject here?
|
|
251 next LINE;
|
|
252 }
|
|
253 }
|
|
254
|
|
255
|
|
256 #We can add coding xref to feature type based on the miRbase name
|
|
257 #.e.g hsa-mir-24-1
|
|
258 #However, this isn't stored as an xref
|
|
259 #It is stored in the gene.description
|
|
260 #e.g. hsa-mir-24-1 [Source:miRBase;Acc:MI0000080]
|
|
261 #Not easy to fetch as descriptions not indexed!
|
|
262 #
|
|
263
|
|
264 #Cache/store FeatureType
|
|
265
|
|
266 if(! exists $features_by_name{$id}){
|
|
267 $features_by_name{$id} = $ftype_adaptor->fetch_by_name($id);
|
|
268
|
|
269 if(! defined $features_by_name{$id}){
|
|
270 ($features_by_name{$id}) = @{$ftype_adaptor->store(Bio::EnsEMBL::Funcgen::FeatureType->new
|
|
271 (
|
|
272 -name => $id,
|
|
273 -class => 'RNA',
|
|
274 -description => $method.' '.$feature,
|
|
275 ))};
|
|
276
|
|
277 #Need to add source gene xref here to enable target conequences implied by source variation
|
|
278
|
|
279 }
|
|
280 }
|
|
281
|
|
282
|
|
283 #Make sure we have the target transcript before we store the feature
|
|
284 #Have to do this as we can't always run with the correct core DB
|
|
285 #as it may be too old. Hence we have to hard code the edb.release
|
|
286
|
|
287
|
|
288 ##This should enever happen, as the search regions are defined by ens transcript
|
|
289 ##i.e there is always a ensembl iD
|
|
290
|
|
291 if (! $ens_id) {
|
|
292 warn("No xref available for miRNA $id\n");
|
|
293 $skipped_xref++;
|
|
294 next;
|
|
295 }
|
|
296
|
|
297
|
|
298
|
|
299 if(exists $feature_cache{$id.':'.$chr.':'.$start.':'.$end}){
|
|
300 $feature = $feature_cache{$id.':'.$chr.':'.$start.':'.$end};
|
|
301 }
|
|
302 else{
|
|
303
|
|
304 $feature = Bio::EnsEMBL::Funcgen::ExternalFeature->new
|
|
305 (
|
|
306 -display_label => $id,
|
|
307 -start => $start,
|
|
308 -end => $end,
|
|
309 -strand => $strand,
|
|
310 -feature_type => $features_by_name{$id},
|
|
311 -feature_set => $set,
|
|
312 -slice => $slice_cache{$chr},
|
|
313 );
|
|
314
|
|
315 # project if necessary
|
|
316 if ($new_assembly) {
|
|
317
|
|
318 $feature = $self->project_feature($feature, $new_assembly);
|
|
319
|
|
320 if (! defined $feature) {
|
|
321 $proj_skipped ++;
|
|
322 $skipped_features{$id.':'.$chr.':'.$start.':'.$end} = 'Failed projection';
|
|
323 next;
|
|
324 }
|
|
325
|
|
326 #This was failing as old assembly seqs are not stored, hence are returned as Ns
|
|
327 #if ($old_seq ne $feature->seq){
|
|
328 # $skipped_features{$id.':'.$chr.':'.$start.':'.$end} = 'Projected sequence mismatch';
|
|
329 # $seq_skipped++;
|
|
330 # next;
|
|
331 #}
|
|
332
|
|
333 #Assembly mapping tends to ignore single seq mismatches, but does handle gaps.
|
|
334 #This is not generally a problemas a new assembly is normally a reshuffle of existing
|
|
335 #clones/contigs which will have exactly the same seq(actually there is a small amount of change)
|
|
336 #but this only affected a handful of transcripts
|
|
337 #old super(contigs) which have be integrated into a chromosome are not mapped
|
|
338 #so we will lose these
|
|
339 #the rest (retired clones/contigs and new clones/contigs) are aligned
|
|
340 #but again mismatches tend to be ignored.
|
|
341
|
|
342 #There may be a tendency for this later case to contain more mismatches due to an entirely new
|
|
343 #clone seqeunce, hence let's filter/count these?
|
|
344
|
|
345 #Tested and import using the following hack using old 67 DB to test seq mismatches
|
|
346 #only 1 failed.
|
|
347 #my $old_slice = $v67_sa->fetch_by_region( undef, $chr, $start, $end, $strand);
|
|
348
|
|
349 #if ($old_slice->seq ne $feature->seq){
|
|
350 # $skipped_features{$id.':'.$chr.':'.$start.':'.$end} = 'Projected sequence mismatch';
|
|
351 # $seq_skipped++;
|
|
352 # next LINE;
|
|
353 #}
|
|
354 }
|
|
355 }
|
|
356
|
|
357
|
|
358
|
|
359 ### DEFINE TRANSCRIPT XREF INFO
|
|
360
|
|
361 #Check transcript exists
|
|
362 $display_name = $self->get_core_display_name_by_stable_id($self->db->dnadb, $ens_id, 'transcript');
|
|
363
|
|
364
|
|
365 # ATTEMPT TO REANNOTATE
|
|
366 if (! defined $display_name){ # Transcript does not exist in current release
|
|
367
|
|
368 #Set to 0 as we are not storing this sid
|
|
369 $seen_mifeat_trans{$id.':'.$chr.':'.$start.':'.$end}{$ens_id} = 0;
|
|
370
|
|
371 $self->log("$id $ens_id stable ID has been retired");
|
|
372 $retired_sids ++;
|
|
373
|
|
374 #Try and re-annotate on newer overlapping transcripts
|
|
375 @txs = @{$trans_adaptor->fetch_all_by_Slice($feature->feature_Slice)};
|
|
376 @sid_dnames = ();
|
|
377
|
|
378 #COUNT unique miRNA features which have failed reannotated
|
|
379 if(! exists $failed_reassignment{$id.':'.$chr.':'.$start.':'.$end}){
|
|
380 $failed_reassignment{$id.':'.$chr.':'.$start.':'.$end} = 1;
|
|
381 }
|
|
382
|
|
383
|
|
384
|
|
385 if (@txs) { #OVERLAPPING TRANSCRIPTS
|
|
386 $with_overlapping_tx ++;
|
|
387
|
|
388 foreach my $tx(@txs){
|
|
389
|
|
390 #Check we have previously seen this xref
|
|
391 if(! exists $seen_mifeat_trans{$id.':'.$chr.':'.$start.':'.$end}{$tx->stable_id}){
|
|
392
|
|
393 #Do UTR checking here
|
|
394 $overlaps_3_utr = 0;
|
|
395
|
|
396 #Transcript will always return wrt to feature_Slice of miRNA feature.
|
|
397 #As miRNA are complimentary to the the mRNA, which is complimentary
|
|
398 #to the sense strand of the gene, these should always be on the same strand.
|
|
399
|
|
400 #Could add some more detailed strand/UTR fail counts in here
|
|
401 #but leave for now
|
|
402 #$same_strand = 1;
|
|
403
|
|
404 if($tx->seq_region_strand != $strand){
|
|
405 #$same_strand = 0;
|
|
406 }
|
|
407 elsif($strand == 1){
|
|
408
|
|
409 if( ($end <= $tx->seq_region_end) &&
|
|
410 ($start > $tx->coding_region_end) ){
|
|
411 $overlaps_3_utr = 1;
|
|
412 }
|
|
413
|
|
414 }
|
|
415 else{#Must be -1
|
|
416
|
|
417 if( ($end < $tx->coding_region_start) &&
|
|
418 ($start >= $tx->seq_region_start) ){
|
|
419 $overlaps_3_utr = 1;
|
|
420 }
|
|
421 }
|
|
422
|
|
423 #could count no utr match here if we set same_strand boolean
|
|
424
|
|
425 if($overlaps_3_utr){
|
|
426 $display_name = $self->get_core_display_name_by_stable_id($self->db->dnadb,
|
|
427 $tx->stable_id,
|
|
428 'transcript');
|
|
429 push @sid_dnames, [$tx->stable_id, $display_name];
|
|
430 $seen_mifeat_trans{$id.':'.$chr.':'.$start.':'.$end}{$tx->stable_id} = 1;
|
|
431 #want to count re-annotated xrefs here
|
|
432 #These may have been represented in the original file
|
|
433 $new_xrefs ++;
|
|
434 }
|
|
435 else{
|
|
436 # FAILED annotate new sid
|
|
437 # This currently includes +ve and -ve strand transcripts
|
|
438 $failed_new_sids ++;
|
|
439 }
|
|
440 }
|
|
441
|
|
442 if(! @sid_dnames){
|
|
443 #FAILED TO REANNOTATE retired sid xref
|
|
444 $utr_failed_old_sids ++;
|
|
445
|
|
446 next LINE;
|
|
447 }
|
|
448 }
|
|
449 }
|
|
450 else{ #FAILED TO REANNOTATE XREF - NO OVERLAPPING TRANSCRIPTS
|
|
451 $no_proj_failed_old_sids ++;
|
|
452 next LINE;
|
|
453 }
|
|
454
|
|
455
|
|
456 #COUNT unique miRNA features which have failed reannotated
|
|
457 if(@sid_dnames){
|
|
458 $failed_reassignment{$id.':'.$chr.':'.$start.':'.$end} = 0
|
|
459 }
|
|
460
|
|
461 }
|
|
462 else { #Add xref for existing transcript
|
|
463 $seen_mifeat_trans{$id.':'.$chr.':'.$start.':'.$end} = {$ens_id => 1};
|
|
464 #$display_name = $ens_display_name;
|
|
465 @sid_dnames = ([$ens_id, $display_name]);
|
|
466 $existing_sids++;
|
|
467 }
|
|
468
|
|
469
|
|
470 #shouldn't need this if as we call next for everycase above
|
|
471 #if (@xref_details) {
|
|
472 #Only if we have target transcripts
|
|
473
|
|
474 # STORE FEATURE
|
|
475 if (! exists $feature_cache{$id.':'.$chr.':'.$start.':'.$end}){
|
|
476 ($feature) = @{$extf_adaptor->store($feature)};
|
|
477 $feature_cache{$id.':'.$chr.':'.$start.':'.$end} = $feature;
|
|
478 $cnt++;
|
|
479 }
|
|
480
|
|
481
|
|
482 # STORE XREFS
|
|
483 foreach my $xref_info(@sid_dnames) {
|
|
484
|
|
485 #Handle release/version in xref version as stable_id version?
|
|
486
|
|
487 $dbentry = Bio::EnsEMBL::DBEntry->new
|
|
488 (
|
|
489 -dbname => $edb_name,
|
|
490 -release => $schema_build,
|
|
491 #-release => '58_37k',#'46_36h', #Hard coded due to schema to old to use with API
|
|
492 -status => 'KNOWNXREF',
|
|
493 #-display_label_linkable => 1,
|
|
494 -db_display_name => 'EnsemblTranscript',
|
|
495 -type => 'MISC',
|
|
496 -primary_id => $xref_info->[0],
|
|
497 -display_id => $xref_info->[1],
|
|
498 -info_type => 'MISC',
|
|
499 -info_text => 'TRANSCRIPT',
|
|
500 -linkage_annotation => 'miRanda target - negative influence',
|
|
501 #could have version here if we use the correct dnadb to build the cache
|
|
502 -analysis => $analysis,
|
|
503 );
|
|
504
|
|
505 $dbentry_adaptor->store($dbentry, $feature->dbID, 'ExternalFeature', 1); #1 is ignore release flag
|
|
506 $total_xrefs++;
|
|
507 }
|
|
508 }
|
|
509
|
|
510 close FILE;
|
|
511
|
|
512 #scalar context returns count
|
|
513 my $miRNA_failed_reassignment = grep/1/, values %failed_reassignment;
|
|
514 my $miRNA_skipped = $old_mid_skipped + $slice_skipped +
|
|
515 $proj_skipped + $seq_skipped + $miRNA_failed_reassignment;
|
|
516
|
|
517 $self->log_header($set->name." Import Report");
|
|
518 $self->log(sprintf("%-090s", "miRNA feature:target pairs seen(i.e. input rows):").$row_cnt);
|
|
519 $self->log(sprintf("%-090s","Stored NR miRanda miRNA ExternalFeatures:").$cnt);
|
|
520 $self->log(sprintf("%-090s","Total skipped miRanda miRNA target features(inc reassigned):").$miRNA_skipped);
|
|
521 $self->log(sprintf("%-090s","Old miRbase IDs skipped").$old_mid_skipped);
|
|
522 $self->log(sprintf("%-090s","Skipped features on unknown slice:").$slice_skipped."\n");
|
|
523
|
|
524 if($new_assembly){
|
|
525 $self->log(sprintf("%-090s","Skipped due to failed assembly projection:").$proj_skipped);
|
|
526 $self->log(sprintf("%-090s","Skipped due to seq mismatch for assembly projection:")."$seq_skipped\n");
|
|
527 }
|
|
528
|
|
529 $self->log("The following numbers are counted from the mappable/valid miRNA target features");
|
|
530 $self->log(sprintf("%-090s", "Total stored Transcript xrefs(current and retired re-assigned):"). $total_xrefs);
|
|
531 $self->log(sprintf("%-090s", "Total current Transcript xrefs:"). $existing_sids);
|
|
532 $self->log(sprintf("%-090s", "Total skipped miRanda miRNA target xrefs:").($previously_skipped + $miRNA_skipped));
|
|
533 $self->log(sprintf("%-090s", "Retired Transcript xrefs:").$retired_sids);
|
|
534 $self->log(sprintf("%-090s", "Unique miRNA features which completely failed reassignment:").$miRNA_failed_reassignment);
|
|
535 $self->log(sprintf("%-090s", "Total new Xrefs assigned due to retired Transcript:").$new_xrefs);
|
|
536 $self->log(sprintf("%-090s", "Previously assigned Transcript Xrefs skipped:").$old_as_new_xrefs);
|
|
537 $self->log(sprintf("%-090s", "Retired Transcript Xrefs with no new overlapping Transcript:").$no_proj_failed_old_sids);
|
|
538 $self->log(sprintf("%-090s", "Retired Transcript Xrefs with new overlapping Transcript(s):").$with_overlapping_tx);
|
|
539 $self->log(sprintf("%-090s", "Retired Transcript Xrefs with new overlapping Transcript(s), all fail 3'UTR/strand test:").
|
|
540 $utr_failed_old_sids);
|
|
541 #This figure may include transcripts which would have failed in the original set!
|
|
542 $self->log(sprintf("%-090s","Total new Transcript xrefs considered which fail 3' UTR/strand test:"). $failed_new_sids);
|
|
543 $self->log(sprintf("%-090s","True new Xref assignments due to retired Transcripts:").($new_xrefs - $old_as_new_xrefs));
|
|
544
|
|
545 #We also want unique miRNA feature which we re-assigned
|
|
546 #Hard to calculate as the re-assignment might be to a current transcript in the input
|
|
547
|
|
548 #Now set states
|
|
549 foreach my $status(qw(DISPLAYABLE MART_DISPLAYABLE)){
|
|
550 $set->adaptor->store_status($status, $set);
|
|
551 }
|
|
552
|
|
553 return;
|
|
554 }
|
|
555
|
|
556 1;
|