1
|
1 #:t:::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
2 #:t::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
3 #:::::::::::::z;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
4 #::::::::::::i@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
5 #::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@$@@@@
|
|
6 #:::::::::::3@@@@@@@@@@@@@@@@@@@@@@@@@B@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
7 #::::::::::3@@@@@@@@@@@@@@@@@@@@@BEEESSE5EEEEBBM@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
8 #::::::::::3@@@@@@@@@@@@@@@@@@@@BEEEEEE35EE55E2355E5SBMB@@@@@@@@@@@@@@@@@$
|
|
9 #::::::::::@@@@@@@@@@@@@@@@@@@EEEE55533t3tttt::::::!!!!7755E755SBBMMM@@@MM
|
|
10 #::::::::::3@@@@@@@@@@@@@@@@@@EEEE2t3ttttt:::::::::::::::::::::::!7?5225EE
|
|
11 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEE31t::::::::::::::::::::::::::::::::3E5@
|
|
12 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEEtt:::::::::::::::::::::::::::::::::353
|
|
13 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEE1ttz::::::::::::::::::::::::::::::::35
|
|
14 #:::::::::::@@@@@@@@@@@@@@@@@@EEEEEEEtz1::::::::::::::::::::::::::::::::t:
|
|
15 #:::::::::!3@@@@@@@@@@@@@@@@@@@EEEEEttt::::::::::::::::::::::::::::::::;zz
|
|
16 #::::::::::@@@@@@@@@@@@@@@@@@@@EEEEEttt:::::z;z:::::::::::::::::::::::::13
|
|
17 #::::::::::3B@@@@@@@@@@@@@@@@@@EEEEEEE3tt:czzztti;:::::::::::::::::::::::3
|
|
18 #::::ttt::::3@@@@@@@@@@@@@@@@EEEEE5EE25Ezt1EEEz5Etzzz;;;;:::::::::::::::::
|
|
19 #:::::::::::I9@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEE@@@@@@@@@@@@@@Ez;:::::::::::
|
|
20 #:::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ez::::::
|
|
21 #::::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@BE5EBB@@@@@@@@@@@@@@@EEE:::::
|
|
22 #:::::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@E1::35@@@@@@@@@@ME3MMME2::::::
|
|
23 #:::::::::::::::?@@@@@@@@@@@@@@@@@@M@@@@@@@EE:::::3SB@@BBESEEt::::::::::::
|
|
24 #::::::::::::::::J$@@@@@@@B@@@@@@@@@@@@@@@@EE:::::::!35E33t:::::::::::::::
|
|
25 #:::::::::::::::::3@E@@@EE5EESE5EESE@@@@@@@Et::::::::::::tz:::::::::::::::
|
|
26 #:::::::::::::::::J@E$@EEE5133555SE@@@@@@@@Et:::::::::::::::::::::::::::::
|
|
27 #::::::::::::::::::E@E@EEEEtt3523EEE@@@@@@@E::::::::::::::::::::::::::::::
|
|
28 #:t::::::::::::::::JEE3@@@EEEEEEEEEE@@@@@@@E:::::::::t;:::::::::::::::::::
|
|
29 #:t:::::::::::::::::!5ES@EEEEEEEEES@@@@@@@@@E;:::;;;:3Ez::::::::::::::::::
|
|
30 #:t::::::::::::::::::::JE@@EEEEEEE@@@@@@@@@@@@@@@@ME!:::;:::::::::::::::::
|
|
31 #:tz::::::::::::::::::::JE@@@EEEE@@@@@@@@@@@@@@EE!:::::::t::::::::::::::::
|
|
32 #:t::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@ESBE::::::::::::::::::::::::::
|
|
33 #:::::::::::::::::::::::::Q@@@@@@@@@@@@@@@@EE3EE;:::::zzzz::::::::::::::::
|
|
34 #:::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@@@@NN@@@@@@Ez:::::::::::::::
|
|
35 #:zt:::::::::::::::::::::::3@@@@EE@@@@@@@@@@EEEEt::;z113E5t:::::::::::::::
|
|
36 #::tt:::::::::::::::::::::::3@@@E@@@@@@@@@@@@@@@@BEt::::::::::::::::t:::::
|
|
37 #:tt:t:::::::::::::::::::::::?S@@@@@@@@@@@BBEEE51!::::::::::::::zzzEt:::::
|
|
38 #::::::::::::::::::::::::::::::3Q@@@@@@@BEEEEEt:::::::::::::;zz@@@EE::::::
|
|
39 #::::::::::::::::::::::::::::::::75B@@@@@EEEtt;:::::::::;zz@@@@BEEEtz:::::
|
|
40 #::::::::::::::::::::::::::::::::::::?9@@@@@@@@@@@E2Ezg@@@@@B@@@EEEE1t::::
|
|
41 #:::::::::::::::::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@E@EEEEEEEzzz::
|
|
42 #::::::::::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@EEEEEEE5ttttt
|
|
43 #:::::::::::::::::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEEEEEEEtzt
|
|
44 #::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@E@@EEEEEEEEEEEE@@@
|
|
45 #::::::::::::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE3EEEE@@@@@@@
|
|
46 #:::::::::::::::::::::;;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEt33@@@@@@@@@@
|
|
47 #:::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@E@@@@@@EEEtg@@@@@@@@@@@@
|
|
48 #::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE@@@@@@@@@@@@@@@@@@@@@@@@
|
|
49 #:::::::::::::@@@@@@@@@@@@@@@@@$@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
50 #::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
|
51 #
|
|
52 # Copyleft ↄ⃝ 2012 Institut Curie
|
|
53 # Author(s): Valentina Boeva, Alban Lermine (Institut Curie) 2012
|
|
54 # Contact: valentina.boeva@curie.fr, alban.lermine@curie.fr
|
|
55 # This software is distributed under the terms of the GNU General
|
|
56 # Public License, either Version 2, June 1991 or Version 3, June 2007.
|
|
57
|
|
58 #!/usr/bin/perl -w
|
|
59
|
|
60 #for a complete list of genes and a list of sites, sorts genes close to sites
|
|
61 #printDistance
|
|
62
|
|
63 #read only gene files with refSeqGenes
|
|
64 #counts only once overlapping transcripts or genes
|
|
65
|
|
66 #calculates some stats about locations of peaks
|
|
67
|
|
68 use strict;
|
|
69
|
|
70 my $SitesFilename ;
|
|
71 my $SitesFilename2 ="";
|
|
72 my $GenesFilename ;
|
|
73 my $MirFilename = "";
|
|
74 my $BIGNUMBER = 10000000;
|
|
75 my $verbose = 0;
|
|
76 my $header = 0;
|
|
77 my $noXY = 0;
|
|
78 my $minscore = 0;
|
|
79
|
|
80 my $usage = qq{
|
|
81 $0
|
|
82
|
|
83 -----------------------------
|
|
84 mandatory parameters:
|
|
85
|
|
86 -g filename file with all genes
|
|
87 -f filename file with sites in BED format
|
|
88
|
|
89 -----------------------------
|
|
90 optional parameters:
|
|
91 -v for verbose
|
|
92 -minScore minimal score
|
|
93 -mir filename file with positions of miRNA
|
|
94 -head if there is a header
|
|
95 -reg filename file with FoldChanges or expression values and annotation (should in colomn 4)
|
|
96 -noXY
|
|
97 -o filename output file
|
|
98 };
|
|
99
|
|
100 if(scalar(@ARGV) == 0){
|
|
101 print $usage;
|
|
102 exit(0);
|
|
103 }
|
|
104
|
|
105 my $ResFilename = "";
|
|
106
|
|
107 my $regFilename = "";
|
|
108
|
|
109 while(scalar(@ARGV) > 0){
|
|
110 my $this_arg = shift @ARGV;
|
|
111 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
|
|
112
|
|
113 elsif ( $this_arg eq '-g') {$GenesFilename = shift @ARGV;}
|
|
114 elsif ( $this_arg eq '-f') {$SitesFilename = shift @ARGV;}
|
|
115 elsif ( $this_arg eq '-mir') {$MirFilename = shift @ARGV;}
|
|
116 elsif ( $this_arg eq '-v') {$verbose = 1;}
|
|
117 elsif ( $this_arg eq '-head') {$header = 1;}
|
|
118 elsif ( $this_arg eq '-noXY') {$noXY = 1;}
|
|
119 elsif ( $this_arg eq '-o') {$ResFilename = shift @ARGV;}
|
|
120 elsif ( $this_arg eq '-reg') {$regFilename = shift @ARGV;}
|
|
121 elsif ( $this_arg eq '-minScore') {$minscore = shift @ARGV;}
|
|
122 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
|
|
123 }
|
|
124
|
|
125 if ($ResFilename eq "") {
|
|
126 $ResFilename = $SitesFilename.".withGenes.txt";
|
|
127 }
|
|
128
|
|
129
|
|
130
|
|
131 #------------read expression/foldChange annotation of genes---------
|
|
132
|
|
133 my %geneReg;
|
|
134 if ( $regFilename eq ""){
|
|
135 print "you did not specify file with expression of fold change values\n";
|
|
136 }
|
|
137 else {
|
|
138 open (REG, "<$regFilename ") or die "Cannot open file $regFilename !!!!: $!";
|
|
139 #Gene median mad flag
|
|
140 #RPL41 13.60755187 0.074233841 EXPRESSED
|
|
141 #A 1110013L07Rik 0.26 up-regulated
|
|
142
|
|
143 while (<REG>) {
|
|
144 chomp;
|
|
145 my ($name, $value) = (split)[1,3];
|
|
146 $geneReg{$name} = $value;
|
|
147 }
|
|
148 close REG;
|
|
149 }
|
|
150 #-----------read genes----------------
|
|
151
|
|
152 my %genes;
|
|
153
|
|
154 my $count = 0;
|
|
155
|
|
156 open (GENES, "<$GenesFilename") or die "Cannot open file $GenesFilename!!!!: $!";
|
|
157 <GENES>;
|
|
158
|
|
159 my ($name,$chr,$strand,$leftPos,$rightPos);
|
|
160
|
|
161 while (<GENES>) {
|
|
162 chomp;
|
|
163 if (/(chr\S*)\s(\d+)\s(\d+)\s([-]?1)\s\S+\s\S+\s(\S+)/){
|
|
164 $name = $5;
|
|
165 $chr = $1;
|
|
166
|
|
167 $strand = $4;
|
|
168 if ($strand eq '1') {
|
|
169 $strand = 1;
|
|
170 }
|
|
171 else {
|
|
172 $strand = -1;
|
|
173 }
|
|
174 $leftPos = $2;
|
|
175 $rightPos = $3;
|
|
176 next if (($chr =~ m/[XY]/)&&($noXY));
|
|
177 next if ($name =~ m/MIR/);
|
|
178 } elsif (/(chr\S*)\s([+-])\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\S+)\s(\S+)\s\S+\s(\S+)/) {
|
|
179 $name = $10;
|
|
180 $chr = $1;
|
|
181 $strand = $2;
|
|
182 $leftPos = $3;
|
|
183 $rightPos = $4;
|
|
184 next if (($chr =~ m/[XY]/)&&($noXY));
|
|
185 next if ($name =~ m/MIR/);
|
|
186
|
|
187 if ($strand eq '+') {
|
|
188 $strand = 1;
|
|
189 }
|
|
190 else {
|
|
191 $strand = -1;
|
|
192 }
|
|
193 } else {
|
|
194 print "Wrong type: ",$_,"\n";
|
|
195 }
|
|
196 my $ID = "$name\t$chr:$leftPos-$rightPos";
|
|
197 my $reg = "NA";
|
|
198 if (exists($geneReg{$name})) {
|
|
199 $reg = $geneReg{$name};
|
|
200 }
|
|
201 unless (exists($genes{$chr})) {
|
|
202 my %h;
|
|
203 $genes{$chr} = \%h;
|
|
204 }
|
|
205 unless (exists($genes{$chr}->{$ID})) {
|
|
206 my %h1;
|
|
207 $genes{$chr}->{$ID} = \%h1;
|
|
208 $count++;
|
|
209 }
|
|
210 $genes{$chr}->{$ID}{'name'} = $name ;
|
|
211 $genes{$chr}->{$ID}{'left'} = $leftPos ;
|
|
212 $genes{$chr}->{$ID}{'right'} = $rightPos ;
|
|
213
|
|
214 $genes{$chr}->{$ID}{'strand'} = $strand;
|
|
215
|
|
216 $genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
|
|
217 $genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;
|
|
218 $genes{$chr}->{$ID}{'reg'} = $reg;
|
|
219 $genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos);
|
|
220 $genes{$chr}->{$ID}{'closestPicDist'} = $BIGNUMBER;
|
|
221 $genes{$chr}->{$ID}{'closestPositivePicDist'} = $BIGNUMBER;
|
|
222 $genes{$chr}->{$ID}{'closestNegativePicDist'} = -$BIGNUMBER;
|
|
223
|
|
224 $genes{$chr}->{$ID}{'closestPicDistTE'} = $BIGNUMBER;
|
|
225
|
|
226 }
|
|
227 print "Total genes : $count\n";
|
|
228
|
|
229
|
|
230
|
|
231 close GENES;
|
|
232 print "\t\t$GenesFilename is read!\n";
|
|
233 #for my $gName (sort keys %{$genes{'chr18'}}) {
|
|
234
|
|
235 # print "$gName\t$genes{'chr18'}->{$gName}{'TSS'}\n";
|
|
236 #}
|
|
237
|
|
238 #-----------read file with sites miRNA, store as genes-----
|
|
239
|
|
240 if ( $MirFilename eq ""){
|
|
241 print "you did not specify file with miRNA\n";
|
|
242 }
|
|
243 else {
|
|
244
|
|
245
|
|
246 open (MIR, "<$MirFilename ") or die "Cannot open file $MirFilename !!!!: $!";
|
|
247 #chr1 20669090 20669163 mmu-mir-206 960 +
|
|
248 while (<MIR>) {
|
|
249 chomp;
|
|
250 my ($name, $chr, $leftPos, $rightPos, $strand );
|
|
251 #1 . miRNA 20669091 20669163 . + . ACC="MI0000249"; ID="mmu-mir-206";
|
|
252 if (/([0-9XYM]+)\s.\smiRNA\s(\d+)\s(\d+)\s.\s([+-])\s.\sACC=.*ID=\"(.*)\"/) {
|
|
253 $name = $5;
|
|
254 $chr = $1;
|
|
255 $leftPos = $2;
|
|
256 $rightPos = $3;
|
|
257 $strand = $4;
|
|
258 }
|
|
259 elsif (/(.*)\s(\d+)\s(\d+)\s(.*)\s(.*)\s(.*)/){
|
|
260 $name = $4;
|
|
261 $chr = $1;
|
|
262 $leftPos = $2;
|
|
263 $rightPos = $3;
|
|
264 $strand = $6;
|
|
265 } else {
|
|
266 next;
|
|
267 }
|
|
268
|
|
269 unless ($chr =~ m/chr/) {
|
|
270 $chr = "chr".$chr;
|
|
271 }
|
|
272 my $ID = "$name\t$chr:$leftPos-$rightPos";
|
|
273
|
|
274 if ($strand eq '+') {
|
|
275 $strand = 1;
|
|
276 }
|
|
277 else {
|
|
278 $strand = -1;
|
|
279 }
|
|
280
|
|
281 unless (exists($genes{$chr})) {
|
|
282 my %h;
|
|
283 $genes{$chr} = \%h;
|
|
284 }
|
|
285 unless (exists($genes{$chr}->{$ID})) {
|
|
286 my %h1;
|
|
287 $genes{$chr}->{$ID} = \%h1;
|
|
288 $count++;
|
|
289 }
|
|
290 $genes{$chr}->{$ID}{'name'} = $name ;
|
|
291 $genes{$chr}->{$ID}{'left'} = $leftPos ;
|
|
292 $genes{$chr}->{$ID}{'right'} = $rightPos ;
|
|
293 $genes{$chr}->{$ID}{'cdsStart'} = $leftPos ;
|
|
294 $genes{$chr}->{$ID}{'cdsEnd'} = $rightPos ;
|
|
295 $genes{$chr}->{$ID}{'strand'} = $strand;
|
|
296 $genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos);
|
|
297 $genes{$chr}->{$ID}{'exonCount'} = 1;
|
|
298 $genes{$chr}->{$ID}{'exonStarts'} = $leftPos ;
|
|
299 $genes{$chr}->{$ID}{'exonEnds'} = $rightPos ;
|
|
300 $genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
|
|
301 $genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;
|
|
302 $genes{$chr}->{$ID}{'reg'} = "miRNA";
|
|
303 ($genes{$chr}->{$ID}{'firstIntronStart'},$genes{$chr}->{$ID}{'firstIntronEnd'}) = (0,0);
|
|
304 $genes{$chr}->{$ID}{'closestPicDist'} = $BIGNUMBER;
|
|
305 $genes{$chr}->{$ID}{'closestPicDistTE'} = $BIGNUMBER;
|
|
306 $genes{$chr}->{$ID}{'closestPositivePicDist'} = $BIGNUMBER;
|
|
307 $genes{$chr}->{$ID}{'closestNegativePicDist'} = -$BIGNUMBER;
|
|
308
|
|
309
|
|
310 }
|
|
311
|
|
312
|
|
313 close MIR;
|
|
314 print "\t\t$MirFilename is read!\n" if ($verbose) ;
|
|
315 }
|
|
316
|
|
317
|
|
318
|
|
319 #-----------read file with sites and find overlapping genes-----
|
|
320 my $numberOfAllSites = 0;
|
|
321
|
|
322 my $lengthLosses = 0;
|
|
323 my $lengthGains = 0;
|
|
324
|
|
325 open (FILE, "<$SitesFilename") or die "Cannot open file $SitesFilename!!!!: $!";
|
|
326 open (OUT , ">$ResFilename") or die "Cannot open file $ResFilename!!!!: $!";
|
|
327 print OUT "chrom start end max_coord score Dist DistTE geneName geneCoord Reg\n";
|
|
328 open (GENESOUT , ">$ResFilename.genes") or die "Cannot open file $ResFilename.genes!!!!: $!";
|
|
329
|
|
330 print GENESOUT "Name\tCoord\tDist\tDistTE\tReg\n";
|
|
331
|
|
332 my $correction = 0;
|
|
333
|
|
334 if ($header) {
|
|
335 my $headerString = <FILE>; #read header
|
|
336 my @a = split (/\t/,$header );
|
|
337
|
|
338 if ($a[1] =~m/chr/) {
|
|
339 $correction=1;
|
|
340 }
|
|
341 }
|
|
342
|
|
343 $count = 0;
|
|
344 print $SitesFilename, "\n";
|
|
345 while (<FILE>) {
|
|
346 chomp;
|
|
347 next if (/track/);
|
|
348 my $string = $_;
|
|
349 chomp $string;
|
|
350 $numberOfAllSites++;
|
|
351 my @a = split /\t/;
|
|
352 if (($a[1] =~m/chr/)&&($correction==0)) {
|
|
353 $correction=1;
|
|
354 }
|
|
355 my $chro = $a[0+$correction];
|
|
356 next if (($chro =~ m/[XY]/)&&($noXY));
|
|
357 unless ($chro =~ m/chr/) {
|
|
358 $chro = "chr".$chro;
|
|
359 }
|
|
360
|
|
361 my $fpos = $a[1+$correction];
|
|
362 my $lpos = $a[2+$correction];
|
|
363 my $score = $a[4+$correction];
|
|
364 if ($minscore>0) {
|
|
365 next if ($score<$minscore);
|
|
366 }
|
|
367 next if ($chro=~m/M/);
|
|
368
|
|
369 print OUT $string, "\t";
|
|
370
|
|
371 findClosestTSS ($fpos,$lpos,$chro,\%genes);
|
|
372
|
|
373 $count ++;
|
|
374 }
|
|
375 close FILE;
|
|
376 close OUT ;
|
|
377 close GENESOUT;
|
|
378 print "Total Sites: $count\n";
|
|
379
|
|
380 open (GENESOUT , ">$ResFilename.genes.ClosestPeakDist") or die "Cannot open file $ResFilename.genes.ClosestPeakDist!!!!: $!";
|
|
381 print GENESOUT "Name Coord Dist DistTE Reg posDist negDist\n";
|
|
382 for my $chro (keys %genes) {
|
|
383 for my $gName (keys %{$genes{$chro}}) {
|
|
384 print GENESOUT "$gName\t",$genes{$chro}->{$gName}{'closestPicDist'},"\t",$genes{$chro}->{$gName}{'closestPicDistTE'},"\t",$genes{$chro}->{$gName}{'reg'},"\t",$genes{$chro}->{$gName}{'closestPositivePicDist'},"\t",$genes{$chro}->{$gName}{'closestNegativePicDist'},"\n";
|
|
385 }
|
|
386 }
|
|
387 close GENESOUT;
|
|
388
|
|
389 #my @arr = @{$genes{'chr'}};
|
|
390
|
|
391 sub overlapGenes {
|
|
392 my ($otherGene,$bestGene,$chro,$genes)= @_;
|
|
393 my $a1 = $genes->{$chro}{$otherGene}{'left'};
|
|
394 my $a2 = $genes->{$chro}{$bestGene}{'left'};
|
|
395 my $e1 = $genes->{$chro}{$otherGene}{'right'};
|
|
396 my $e2 = $genes->{$chro}{$bestGene}{'right'};
|
|
397 if (($a1 >= $a2)&&($a1 <= $e2)) {
|
|
398 return 1;
|
|
399 }
|
|
400 if (($a2 >= $a1)&&($a2 <= $e1)) {
|
|
401 return 1;
|
|
402 }
|
|
403 return 0;
|
|
404 }
|
|
405
|
|
406 sub overlap {
|
|
407 my ($a1,$a2,$e1,$e2)= @_; #e for genes
|
|
408
|
|
409 if ($a1 > $a2) {
|
|
410 ($a1,$a2) = ($a2,$a1);
|
|
411 }
|
|
412
|
|
413 if ($e1 > $e2) {
|
|
414 ($e1,$e2) = ($e2,$e1);
|
|
415 }
|
|
416
|
|
417
|
|
418 if (($a1 >= $e1)&&($a1 <= $e2)) {
|
|
419 return 2;
|
|
420 }
|
|
421 if (($a2 >= $e1)&&($a2 <= $e2)) {
|
|
422 return 2;
|
|
423 }
|
|
424 if (($e1 >= $a1)&&($e2 <= $a2)) {
|
|
425 return 1;
|
|
426 }
|
|
427
|
|
428 return 0;
|
|
429 }
|
|
430
|
|
431
|
|
432 sub findClosestTSS {
|
|
433 my ($fpos,$lpos,$chro,$genes) = @_;
|
|
434 my $pos = ($fpos+$lpos)/2;
|
|
435 my $gene = "";
|
|
436 my $tssDist=$BIGNUMBER;
|
|
437 my $teDist=$BIGNUMBER;
|
|
438
|
|
439 for my $gName (keys %{$genes->{$chro}}) {
|
|
440 my $TSS = $genes->{$chro}{$gName}{'TSS'};
|
|
441 #print $gName,":",$TSS," " if ($chro eq "chr15");
|
|
442 if (abs($pos-$TSS)<abs($tssDist)) {
|
|
443 $gene=$gName;
|
|
444 $tssDist = ($pos-$TSS)*$genes->{$chro}{$gName}{'strand'};
|
|
445 $teDist = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
|
|
446 }
|
|
447 if (abs($pos-$TSS)<$BIGNUMBER) {
|
|
448 print GENESOUT "$gName\t",($pos-$TSS)*$genes->{$chro}{$gName}{'strand'},"\t",($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'},"\t",$genes->{$chro}{$gName}{'reg'},"\n";
|
|
449
|
|
450 if (abs($pos-$TSS)<abs($genes->{$chro}{$gName}{'closestPicDist'})) {
|
|
451 $genes->{$chro}{$gName}{'closestPicDist'} = ($pos-$TSS)*$genes->{$chro}{$gName}{'strand'};
|
|
452 $genes->{$chro}{$gName}{'closestPicDistTE'} = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
|
|
453 }
|
|
454 if (($pos-$TSS)*$genes->{$chro}{$gName}{'strand'}>=0 && abs($pos-$TSS)<$genes{$chro}->{$gName}{'closestPositivePicDist'} ) {
|
|
455 $genes->{$chro}{$gName}{'closestPositivePicDist'} = ($pos-$TSS)*$genes->{$chro}{$gName}{'strand'};
|
|
456 }
|
|
457 if (($pos-$TSS)*$genes->{$chro}{$gName}{'strand'}<=0 && -1*abs($pos-$TSS)>$genes{$chro}->{$gName}{'closestNegativePicDist'} ) {
|
|
458 $genes->{$chro}{$gName}{'closestNegativePicDist'} = ($pos-$TSS)*$genes->{$chro}{$gName}{'strand'};
|
|
459 }
|
|
460
|
|
461 }
|
|
462 }
|
|
463 print OUT "$tssDist\t$teDist\t$gene\t$genes{$chro}->{$gene}{'reg'}\n" if ($gene ne "");
|
|
464 if ($gene eq "") {
|
|
465 print OUT "$tssDist $teDist NA NA NA\n";
|
|
466 }
|
|
467
|
|
468 }
|
|
469
|
|
470 sub printOvelapingGenes {
|
|
471 my ($fpos,$lpos,$chro,$genes,$globalH,$type) = @_;
|
|
472 #print $fpos,$lpos,$chro,$genes,$globalH,$type,"\n";
|
|
473 my %hash1;
|
|
474 my %hash2;
|
|
475 my @array2return;
|
|
476 for my $gName (keys %{$genes->{$chro}}) {
|
|
477 my $TSS = $genes->{$chro}{$gName}{'TSS'};
|
|
478 my $TE = $genes->{$chro}{$gName}{'TE'};
|
|
479
|
|
480 if (overlap($fpos,$lpos,$TSS,$TE)==1) {
|
|
481 $hash1{$gName} = overlap($fpos,$lpos,$TSS,$TE);
|
|
482 }
|
|
483 if (overlap($fpos,$lpos,$TSS,$TE)==2) {
|
|
484 $hash2{$gName} = overlap($fpos,$lpos,$TSS,$TE);
|
|
485 }
|
|
486 }
|
|
487 print OUT join(",", sort keys %hash1),"\t",join(",", sort keys %hash2),"\n";
|
|
488 if ($type eq "loss") {
|
|
489 for my $key (sort keys %hash1) {
|
|
490 $globalH->{$key} = 1;
|
|
491 }
|
|
492 for my $key (sort keys %hash2) {
|
|
493 $globalH->{$key} = 1;
|
|
494 }
|
|
495 }else {
|
|
496 for my $key (sort keys %hash1) {
|
|
497 $globalH->{$key} = 1;
|
|
498 }
|
|
499 }
|
|
500
|
|
501 }
|
|
502
|
|
503 sub min {
|
|
504 return $_[0] if ($_[0]<$_[1]);
|
|
505 $_[1];
|
|
506 }
|