annotate variant_effect_predictor/Bio/EnsEMBL/Funcgen/RunnableDB/ClusterMotifs.pm @ 2:a5976b2dce6f

changing defualt values for ensembl database
author mahtabm
date Thu, 11 Apr 2013 17:15:42 +1000
parents 1f6dce3d34e0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
1 =pod
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
2
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
3 =head1 NAME
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
4
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
5 Bio::EnsEMBL::Funcgen::RunnableDB::ClusterMotifs
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
6
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
7 =head1 DESCRIPTION
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
8
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
9 'ClusterMotifs'
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
10
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
11 =cut
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
12
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
13
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
14 package Bio::EnsEMBL::Funcgen::RunnableDB::ClusterMotifs;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
15
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
16 use warnings;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
17 use strict;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
18
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
19 use base ('Bio::EnsEMBL::Funcgen::RunnableDB::Motif');
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
20
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
21 use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
22 use Bio::EnsEMBL::Funcgen::Utils::EFGUtils qw (run_system_cmd);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
23 use Data::Dumper;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
24
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
25 sub fetch_input { # nothing to fetch... just the parameters...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
26 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
27
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
28 $self->SUPER::fetch_input();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
29
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
30 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
31 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
32
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
33 sub run {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
34 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
35 my @matrices;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
36
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
37 my $base = $self->_output_dir."/".$self->_feature_set->name;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
38
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
39 my $motif_file = $base.".transfac";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
40 #Maybe check if files are empty or not there at all!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
41
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
42 #Need to clump all motif files together...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
43 run_system_cmd("cat ".$self->_output_dir."/*.tmp_TRANSFAC > ".$motif_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
44
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
45 eval {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
46 #RUN STAMP here to cluster all motifs... change the Jaspar directories!!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
47 my $bin_folder = $self->_bin_folder();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
48 run_system_cmd("${bin_folder}/STAMP -tf $motif_file -align SWU -cc PCC -sd ${bin_folder}/STAMP_data/Jaspar2010_1000random_PCC_SWU.scores -chp -nooverlapalign -match ${bin_folder}/STAMP_data/Jaspar_Core_PBM_PolII.Transfac -match_top 1 -ma IR -out $base");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
49
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
50 #TODO Refactor this section...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
51 my $is_top = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
52 my $top_matrix; #Is the one with smallest p
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
53 my %matrix_scores;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
54 my $min_p = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
55 open(FILE,$motif_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
56 while(<FILE>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
57 my $line = $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
58 if($line =~ /^DE\s+(\S+)\s+(\S+)/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
59 my $name = $1; my $p = $2;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
60 $matrix_scores{$name} = $p;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
61 if($p < $min_p){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
62 $top_matrix = "";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
63 $is_top = 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
64 $min_p = $p;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
65 #import the top matrix...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
66 #Also check if it matches any known matrix...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
67 my $ff = $base."_match_pairs.txt";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
68 if(-e $ff){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
69 open(FIM,$ff);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
70 while(<FIM>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
71 if(/^>\s*(\S+)\s+/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
72 if($1 eq $name){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
73 my $fline = <FIM>; chomp($fline); my ($jaspar, $score) = split(/\s+/, $fline);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
74 if($score < 0.005){ $line =~ s/\n/\t${jaspar}\n/; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
75 last;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
76 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
77 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
78 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
79 close FIM;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
80 } else { warn $ff." was not found!"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
81 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
82 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
83 if($is_top){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
84 $top_matrix .= $line;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
85 if($line =~ /^XX/){ $is_top = 0; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
86 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
87
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
88 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
89 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
90 if($top_matrix){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
91 push @matrices, _transfac_to_jaspar($top_matrix);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
92 } else { warn "No top matrix found!!"; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
93
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
94 #Just check what are the most similar matrices to the clusters...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
95 run_system_cmd("${bin_folder}/STAMP -tf ${base}_tree_clusters.txt -align SWU -cc PCC -sd ${bin_folder}/STAMP_data/Jaspar2010_1000random_PCC_SWU.scores -nooverlapalign -match ${bin_folder}/STAMP_data/Jaspar_Core_PBM_PolII.Transfac -match_top 1 -ma IR -out ${base}_global");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
96
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
97 #Recluster based on matrix similarity...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
98 push @matrices, @{_recluster_motifs($self->_feature_set->name, $self->_bin_folder(),$base,\%matrix_scores)};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
99 };
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
100 if ($@){ warn "No motif found: ".$@; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
101
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
102 #run_system_cmd("rm -f ".$self->_output_dir."/*.tmp_TRANSFAC");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
103
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
104 $self->_matrix_to_store(\@matrices);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
105
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
106 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
107 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
108
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
109
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
110 sub write_output { # Nothing is written at this stage (for the moment)
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
111 my $self = shift @_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
112
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
113 #Store the final matrices obtained...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
114 open(FO,">".$self->_output_dir."/".$self->_feature_set->name.".final");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
115 print FO join("\n",@{$self->_matrix_to_store()});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
116 close FO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
117
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
118 return 1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
119 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
120
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
121 #TODO Need refactoring and optimization...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
122 sub _recluster_motifs {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
123 my ($fset, $bin_folder, $base, $scores) = (shift, shift, shift, shift);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
124 my %clusters;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
125 my @result_matrix = ();
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
126
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
127 open(FILE,$base."_tree_clusters.txt");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
128 while(<FILE>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
129 if(/^DE\s+(\S+)\s*$/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
130 #try to obtain a score here!...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
131 my $cluster_id = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
132 my $matrix = $_;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
133 while(<FILE>){ $matrix .= $_; last if(/^XX/); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
134 my $cluster_members = <FILE>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
135 chomp($cluster_members);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
136 $cluster_members =~ s/^XX\s+\Cluster_Members:\s+//;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
137 $clusters{$cluster_id}{"matrix"}=$matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
138 push @{$clusters{$cluster_id}{"elements"}}, split(/\s+/,$cluster_members);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
139 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
140 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
141 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
142
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
143 #recalculate scores
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
144 foreach my $cluster (keys %clusters){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
145 my $score = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
146 map { $score += $scores->{$_}; } @{$clusters{$cluster}{"elements"}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
147 $score = $score / scalar(@{$clusters{$cluster}{"elements"}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
148 $clusters{$cluster}{"matrix"} =~ s/\n/\t${score}\n/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
149 $clusters{$cluster}{"score"} = $score;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
150 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
151
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
152
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
153 my %reclust;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
154 open(FILE,$base."_global_match_pairs.txt");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
155 while(<FILE>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
156 chomp;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
157 if(/^>\s+(\S+)\s*$/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
158 my $clust_id = $1;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
159 my $match = <FILE>;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
160 chomp($match);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
161 my ($jaspar, $score, undef, undef) = split(/\s/,$match);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
162 #Make this an alpha-parameter?
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
163 if($score < 0.005){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
164 #add the matrix in the first line...
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
165 $clusters{$clust_id}{"matrix"} =~ s/\n/\t${jaspar}\n/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
166 push @{$reclust{$jaspar}}, $clust_id;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
167 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
168 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
169 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
170 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
171
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
172 foreach my $jaspar (keys %reclust) {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
173 if(scalar(@{$reclust{$jaspar}})>1){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
174 my $score = 0;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
175 map { $score += $clusters{$_}{"score"}; } @{$reclust{$jaspar}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
176 $score = $score / scalar(@{$reclust{$jaspar}});
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
177 my $clust_file = $base."_cluster_".$jaspar;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
178 open(FO,">".$clust_file);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
179 map { print FO $clusters{$_}{"matrix"}; } @{$reclust{$jaspar}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
180 close FO;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
181 run_system_cmd("${bin_folder}/STAMP -tf $clust_file -align SWU -cc PCC -sd ${bin_folder}/STAMP_data/Jaspar2010_1000random_PCC_SWU.scores -chp -nooverlapalign -match ${bin_folder}/STAMP_data/Jaspar_Core_PBM_PolII.Transfac -match_top 1 -ma IR -out $clust_file");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
182 my $matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
183 open(FILE,"${clust_file}FBP.txt");
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
184 #Re-add the averaged score here!
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
185 while(<FILE>){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
186 if(/^DE/){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
187 $matrix = "DE ".$fset."_cluster_${jaspar}\t".$score."\t".$jaspar."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
188 } else { $matrix .= $_; }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
189 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
190 close FILE;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
191 push @result_matrix, _transfac_to_jaspar($matrix);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
192 map { delete $clusters{$_}; } @{$reclust{$jaspar}};
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
193 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
194 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
195
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
196 map { push @result_matrix, _transfac_to_jaspar($clusters{$_}{"matrix"}); } keys %clusters;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
197
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
198 return \@result_matrix;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
199
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
200 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
201
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
202
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
203 #private function that transforms a transfac matrix to jaspar format
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
204 sub _transfac_to_jaspar{
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
205 my ($transfac) = shift;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
206 my $jaspar;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
207
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
208 my @lines = split(/\n/,$transfac);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
209 pop @lines; #XX
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
210 my $title = shift @lines;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
211 $title =~ s/^DE/>/;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
212 $jaspar = $title."\n";
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
213 my @as; my @cs; my @gs; my @ts;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
214 foreach my $line (@lines){
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
215 my (undef,$a,$c,$g,$t,undef) = split(/\s+/,$line);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
216 #convert it to integers if necessary to make it simpler after
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
217 if($a<=1 && $c<=1 && $g<=1 && $t<=1){ $a = int(100*$a); $c=int(100*$c); $g=int(100*$g); $t=int(100*$t); }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
218 push @as, $a; push @cs, $c; push @gs, $g; push @ts, $t;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
219 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
220 $jaspar .= "A [ ".join("\t",@as)." ]\n" ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
221 $jaspar .= "C [ ".join("\t",@cs)." ]\n" ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
222 $jaspar .= "G [ ".join("\t",@gs)." ]\n" ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
223 $jaspar .= "T [ ".join("\t",@ts)." ]\n" ;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
224
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
225 return $jaspar;
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
226 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
227
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
228 #Private getter / setter to the matrices
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
229 sub _matrix_to_store {
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
230 return $_[0]->_getter_setter('matrix_to_store',$_[1]);
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
231 }
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
232
1f6dce3d34e0 Uploaded
mahtabm
parents:
diff changeset
233 1;