annotate variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/MakeDnaseProfile.pm @ 3:d30fa12e4cc5 default tip

Merge heads 2:a5976b2dce6f and 1:09613ce8151e which were created as a result of a recently fixed bug.
author devteam <devteam@galaxyproject.org>
date Mon, 13 Jan 2014 10:38:30 -0500
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Bio::EnsEMBL::Funcgen::RunnableDB::MakeDnaseProfile
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12 package Bio::EnsEMBL::Funcgen::RunnableDB::MakeDnaseProfile;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 use Bio::EnsEMBL::Hive::DBSQL::AnalysisDataAdaptor;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 use base ('Bio::EnsEMBL::Hive::ProcessWithParams');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20 use Data::Dumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 sub fetch_input { # nothing to fetch... just the parameters...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 $self->SUPER::fetch_input();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27 my $work_dir = $self->param('work_dir');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 my $matrix = $self->param('matrix');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29 my $dnase = $self->param('dnase');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 #throw "Dnase file $dnase does not exist in $work_dir" if(! -e $work_dir."/".$dnase);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 throw "Matrix file $matrix does not exist in $work_dir" if(! -e $work_dir."/matches/".$matrix);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32 my $is_male = $self->param('is_male');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 throw "Need to define is_male" if(!defined($is_male));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 #throw "mapability.bedGraph file expected in $work_dir" if(! -e $work_dir."/mapability.bedGraph");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 sub run { # Create Groups and Analysis and stuff...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 my $hlen = 100;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 my $work_dir = $self->param('work_dir');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44 my $matrix = $self->param('matrix');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 my $dnase = $self->param('dnase');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 my $is_male = $self->param('is_male');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 my $motif_size;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49 my %motifs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 #Or remove // from matrix name
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 open(FILE,$work_dir."/matches_vf/".$matrix);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 <FILE>; #title
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 while(<FILE>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 #10 71532 71542 Gata1 8.55866 1 MA0035.2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 my ($chr,$start,$end,$name,$score,$strand)=split("\t");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 next if(!$is_male && ($chr =~ /Y/));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 #Ignore Mitochondria...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 next if($chr =~ /MT/);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 if(!defined($motif_size)){ $motif_size = $end - $start; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 $motifs{$chr}{$start}{$strand}=$score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 #Maybe pass this as parameters
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 my $out_file = $work_dir."/output/".$matrix."_".$dnase.".counts";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 open(FO,">".$out_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 foreach my $chr (sort keys %motifs){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 my %datap = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 my %datan = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 foreach my $start (sort keys %{$motifs{$chr}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 for(my $i=$start-$hlen;$i<$start+$hlen+$motif_size;$i++){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 $datap{$i} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 $datan{$i} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 #Add a mappability score to extra filter...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 #A lot more time and didn't see much evidence of improvement...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 #my %mappability = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 #foreach my $start (sort keys %{$motifs{$chr}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 # for(my $i=$start-$hlen;$i<$start+$hlen+$motif_size;$i++){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 # $mappability{$i} = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87 #open(FILE,$work_dir."/mapability.bedGraph");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 #while(<FILE>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 # chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 # my ($cur_chr,$start,$end,$score)=split();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 # next if($cur_chr ne $chr);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 #for(my $i=$start; $i<$end; $i++){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93 # if(defined($mappability{$i})){ $mappability{$i}=$score; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 # }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96 #close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 my $folder;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 if($is_male){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 $folder = "male";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 $folder = "female";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 #open(FILE,$work_dir."/dnase/".$folder."/".$dnase);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105 open(FILE,"gzip -dc ${work_dir}/dnase/${folder}/${dnase}".'AlnRep*.bam.unique.tagAlign.gz |');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 while(<FILE>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108 my ($cur_chr,$start,$end,undef,undef,$strand)=split("\t");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109 next if(($cur_chr ne $chr) || !defined($datap{$start}));
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 if($strand eq '+'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 $datap{$start}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 $datan{$end-1}++;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 foreach my $start (sort keys %{$motifs{$chr}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 foreach my $strand (keys %{$motifs{$chr}{$start}}){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120 my $score = $motifs{$chr}{$start}{$strand};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 #filter out those that have no mappability in more than 20% of their extension...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 #my $count = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 #for(my $i=$start-$hlen;$i<$start+$hlen+$motif_size;$i++){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 # if($mappability{$i} <= 0.5){ $count++; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 #Remove if more than 20% of region is "non_mappable" (approach from Centipede paper)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 #if(($count / (2*$hlen + $motif_size)) > 0.2){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 # warn "Motif at chr: $chr start: $start strand: $strand was ignored due to low mappability";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 # next;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 #}
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 print FO $chr."\t".$start."\t".($start+$motif_size)."\t".$matrix."\t".$score."\t".$strand;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 for(my $i=$start-$hlen;$i<$start+$hlen+$motif_size;$i++){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 if($strand eq '+'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 print FO "\t".$datap{$i};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 print FO "\t".$datan{$i};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 for(my $i=$start-$hlen;$i<$start+$hlen+$motif_size;$i++){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142 if($strand eq '+'){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 print FO "\t".$datan{$i};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 } else {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 print FO "\t".$datap{$i};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 print FO "\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 close FO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 sub write_output { # Nothing to do here
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 $self->dataflow_output_id($self->input_id, 2);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 1;