annotate variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/RunCCAT.pm @ 0:1f6dce3d34e0

Uploaded
author mahtabm
date Thu, 11 Apr 2013 02:01:53 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Bio::EnsEMBL::Funcgen::RunnableDB::RunCCAT
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 'RunCCAT' Runs CCAT "broad peak" caller and stores peaks as an annotated feature set.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10 Assumes Files are organized and named with a specific convention
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 ($repository_folder)/experiment_name/cell-type_feature-type/
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 unless specific files are given
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 package Bio::EnsEMBL::Funcgen::RunnableDB::RunCCAT;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 use Bio::EnsEMBL::Funcgen::Utils::Helper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 use Bio::EnsEMBL::DBSQL::DBAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 use Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 use Bio::EnsEMBL::Funcgen::InputSet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24 use Bio::EnsEMBL::Funcgen::DataSet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 use Bio::EnsEMBL::Funcgen::FeatureSet;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 use Bio::EnsEMBL::Funcgen::AnnotatedFeature;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw (run_system_cmd);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 use base ('Bio::EnsEMBL::Funcgen::RunnableDB::SWEmbl');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 sub fetch_input { # fetch and preprocess the input file plus do some more checking
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 $self->SUPER::fetch_input();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38 my $efgdba = $self->_efgdba();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 my $fsa = $efgdba->get_FeatureSetAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 if(!$self->_feature_set($fsa->fetch_by_name($self->_feature_set_name()))){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41 throw "Feature Set was not Created";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 my $bin_dir = $self->_bin_dir();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 my $analysis = $self->_analysis();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 #Use the definitions that are on the database
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 my $cell_type = $self->_cell_type()->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 my $feature_type = $self->_feature_type()->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 my $experiment_name = $self->_experiment_name();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 my $file_type = $self->_file_type();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 if(($file_type ne 'sam') && ($file_type ne 'bed') ){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 throw "Only sam and bed currently supported for CCAT";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 my $output_dir = $self->_output_dir();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 my $size_file = $output_dir."/".$self->_set_name.".sizes";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 #Get the size file... similar to the sam header...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 open(FILE, $self->_sam_header);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 #Consider having it pregenerated...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 open(SIZES,">".$size_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 while(<FILE>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 /^(\S+)\s+(\d+)\s+/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 my $slice = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 my $slice_size = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 if(!$slice || !$slice_size){ throw " Could not process sam header line $_ "; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 #Mouse Hack!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 next if(($self->_species eq 'mus_musculus') && !($slice =~ /chromosome/));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 print SIZES $slice."\t".$slice_size."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 close SIZES;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 $self->_size_file($size_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 my $input_dir = $self->_input_dir();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 my $file_name = $self->_input_file();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 my $input_file = $input_dir."/".$file_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 if(-e $input_file){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 my $output_file = $output_dir."/".$file_name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 if(! $self->param('reenter')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 #TODO Validate if existent file is ok.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 $self->_preprocess_file($input_file, $output_file, $file_type) ||
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 throw "Error processing data file $input_file";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 if(! -e $output_file){ warn "$output_file does not exist. May need to rerun from scratch."; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 $self->_input_file($output_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 if(!$self->_results_file($self->param('results_file'))){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 $self->_results_file($output_file.".".$analysis->logic_name);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 } else { throw "No valid data file was given: ".$input_file; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 #Always require a control file...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 my $control_file = $output_dir."/".$self->_control_file();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 if(! -e $control_file){ throw "No valid control file was given: ".$control_file; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 $self->_control_file($control_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 #May need to convert it...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 if($file_type eq 'sam'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 my $input_file_bed = $self->_input_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 $input_file_bed =~ s/\.sam/\.bed/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 #Mouse hack
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 if($self->_species eq 'mus_musculus'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 if(! $self->param('reenter')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117 run_system_cmd($bin_dir."/samtools view -Su ".$self->_input_file." | ${bin_dir}/bamToBed | grep 'chromosome' >${input_file_bed}");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 if(! $self->param('reenter')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 run_system_cmd($bin_dir."/samtools view -Su ".$self->_input_file." | ${bin_dir}/bamToBed >${input_file_bed}");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 $self->_input_file($input_file_bed);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 my $control_file_bed = $self->_control_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 $control_file_bed =~ s/\.sam/\.bed/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 #Mouse Hack
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 if($self->_species eq 'mus_musculus'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 if(! $self->param('reenter')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 run_system_cmd($bin_dir."/samtools view -Su ".$self->_control_file." | ${bin_dir}/bamToBed | grep 'chromosome' >${control_file_bed}");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 if(! $self->param('reenter')){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 run_system_cmd($bin_dir."/samtools view -Su ".$self->_control_file." | ${bin_dir}/bamToBed >${control_file_bed}");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 $self->_control_file($control_file_bed);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 sub run { # call SWEmbl
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 if($self->param('reenter')){ return 1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 my $analysis = $self->_analysis;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 #<CCAT path>/bin/CCAT <ChIP library read file name> <control library read file name>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152 # <chromosome length file name> <config file name> <project name>
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 my $bin_dir = $self->_bin_dir();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 my $command = $bin_dir."/".$analysis->program_file .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 " ".$self->_input_file() .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 " ".$self->_control_file() .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 " ". $self->_size_file() .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 " ".$bin_dir."/ccat_config/".$analysis->parameters .
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 " " . $self->_results_file;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 warn "Running analysis:\t$command";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 run_system_cmd($command);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 sub write_output { # Store results
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 $self->_parse_result_file();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 sub _parse_result_file {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 ### annotated features
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 my $fset = $self->_feature_set();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 my $efgdba = $self->_efgdba();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 my $sa = $efgdba->get_SliceAdaptor();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 #Cache slices and features...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 my %slice;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 my @af;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 my %cache_af;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 open(FILE,$self->_results_file().".significant.region");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 while(<FILE>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 #Content of CCAT output file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195 my ($seqid,$summit,$start,$end,$chipreads,$ctrlreads,$fold,$fdr)= split("\t");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 #Hardcode a minimum fdr... may pass as parameter
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197 next if ($fdr>0.05);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 my $score = $fold;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 #This seqid may vary depending on the input given to SWEmbl...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201 # handle it manually at least for the moment... namely the sam seqid...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202 #Make sure to test thoroughly to see if it works...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 #e.g. chromosome:GRCh37:15:1:102531392:1
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 if($seqid =~ /^\S*:\S*:(\S+):\S+:\S+:\S/) { $seqid = $1; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 #In case UCSC input is used...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 $seqid =~ s/^chr//i;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 if($self->param('slice') && ($seqid ne $self->param('slice'))){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 warn "Feature being ignored as it is not in specified slice ".$self->param('slice')." : Region:".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 $seqid." Start:".$start." End:".$end." Score:".$score." Summit:".$summit."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 #May have some sort of repeats(?). Since it is ordered with significance, ignore remaining hits.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 next if(defined($cache_af{$seqid."_".$start}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 $cache_af{$seqid."_".$start} = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 #next if ($seqid =~ m/^M/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 # filtering is done as a post-processing e.g. FilterBlacklist.pm
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 #$summit = int($summit);#Round up?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 unless (exists $slice{"$seqid"}) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224 $slice{"$seqid"} = $sa->fetch_by_region(undef, $seqid);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227 if( ($start < 1) || ($end > $slice{"$seqid"}->end)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 warn "Feature being ignored due to coordinates out of slice: Region:".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 $seqid." Start:".$start." End:".$end." Score:".$score." Summit:".$summit."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232 #Gracefully handle errors...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 my $af;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
234 eval{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
235 $af = Bio::EnsEMBL::Funcgen::AnnotatedFeature->new
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
236 (
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
237 -slice => $slice{"$seqid"},
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
238 -start => $start,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
239 -end => $end,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
240 -strand => 0,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
241 -score => $score,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
242 -summit => $summit,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
243 -feature_set => $fset,
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
244 );
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
245 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
246 if($@) { warn($@); next; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
247 if(!$af) { warn("Could not create feature - Region:".
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
248 $seqid." Start:".$start." End:".$end." Score:".$score.
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
249 " Summit:".$summit); next; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
250
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
251 push(@af, $af);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
252 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
253 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
254
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
255 # Batch store features...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
256 if(scalar(@af>0)){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
257 $efgdba->get_AnnotatedFeatureAdaptor->store(@af);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
258 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
259 warn "No significant features detected!";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
260 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
261 #Do this on a wrapup runnable...so it will only be visible after reads are loaded...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
262 $fset->adaptor->set_imported_states_by_Set($fset);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
263
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
264 # Status should not be set at this stage
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
265
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
266 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
267
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
268
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
269 #Private getter / setter to the feature set
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
270 sub _feature_set {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
271 return $_[0]->_getter_setter('feature_set',$_[1]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
272 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
273
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
274 #Private getter / setter to the results file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
275 sub _results_file {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
276 return $_[0]->_getter_setter('results_file',$_[1]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
277 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
278
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
279 #Private getter / setter to the size file
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
280 sub _size_file {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
281 return $_[0]->_getter_setter('size_file',$_[1]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
282 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
283
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
284
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
285 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
286