annotate crossBedWithGenes.pl @ 5:10c7433a68b4 draft

Uploaded
author jbrayet
date Fri, 18 Dec 2015 04:47:31 -0500
parents 2a1425b5f4eb
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
1 #:t:::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
2 #:t::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
3 #:::::::::::::z;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
4 #::::::::::::i@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
5 #::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@$@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
6 #:::::::::::3@@@@@@@@@@@@@@@@@@@@@@@@@B@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
7 #::::::::::3@@@@@@@@@@@@@@@@@@@@@BEEESSE5EEEEBBM@@@@@@@@@@@@@@@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
8 #::::::::::3@@@@@@@@@@@@@@@@@@@@BEEEEEE35EE55E2355E5SBMB@@@@@@@@@@@@@@@@@$
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
9 #::::::::::@@@@@@@@@@@@@@@@@@@EEEE55533t3tttt::::::!!!!7755E755SBBMMM@@@MM
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
10 #::::::::::3@@@@@@@@@@@@@@@@@@EEEE2t3ttttt:::::::::::::::::::::::!7?5225EE
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
11 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEE31t::::::::::::::::::::::::::::::::3E5@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
12 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEEtt:::::::::::::::::::::::::::::::::353
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
13 #::::::::::3@@@@@@@@@@@@@@@@@@EEEEEE1ttz::::::::::::::::::::::::::::::::35
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
14 #:::::::::::@@@@@@@@@@@@@@@@@@EEEEEEEtz1::::::::::::::::::::::::::::::::t:
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
15 #:::::::::!3@@@@@@@@@@@@@@@@@@@EEEEEttt::::::::::::::::::::::::::::::::;zz
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
16 #::::::::::@@@@@@@@@@@@@@@@@@@@EEEEEttt:::::z;z:::::::::::::::::::::::::13
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
17 #::::::::::3B@@@@@@@@@@@@@@@@@@EEEEEEE3tt:czzztti;:::::::::::::::::::::::3
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
18 #::::ttt::::3@@@@@@@@@@@@@@@@EEEEE5EE25Ezt1EEEz5Etzzz;;;;:::::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
19 #:::::::::::I9@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEE@@@@@@@@@@@@@@Ez;:::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
20 #:::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ez::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
21 #::::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@BE5EBB@@@@@@@@@@@@@@@EEE:::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
22 #:::::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@E1::35@@@@@@@@@@ME3MMME2::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
23 #:::::::::::::::?@@@@@@@@@@@@@@@@@@M@@@@@@@EE:::::3SB@@BBESEEt::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
24 #::::::::::::::::J$@@@@@@@B@@@@@@@@@@@@@@@@EE:::::::!35E33t:::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
25 #:::::::::::::::::3@E@@@EE5EESE5EESE@@@@@@@Et::::::::::::tz:::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
26 #:::::::::::::::::J@E$@EEE5133555SE@@@@@@@@Et:::::::::::::::::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
27 #::::::::::::::::::E@E@EEEEtt3523EEE@@@@@@@E::::::::::::::::::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
28 #:t::::::::::::::::JEE3@@@EEEEEEEEEE@@@@@@@E:::::::::t;:::::::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
29 #:t:::::::::::::::::!5ES@EEEEEEEEES@@@@@@@@@E;:::;;;:3Ez::::::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
30 #:t::::::::::::::::::::JE@@EEEEEEE@@@@@@@@@@@@@@@@ME!:::;:::::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
31 #:tz::::::::::::::::::::JE@@@EEEE@@@@@@@@@@@@@@EE!:::::::t::::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
32 #:t::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@ESBE::::::::::::::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
33 #:::::::::::::::::::::::::Q@@@@@@@@@@@@@@@@EE3EE;:::::zzzz::::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
34 #:::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@@@@NN@@@@@@Ez:::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
35 #:zt:::::::::::::::::::::::3@@@@EE@@@@@@@@@@EEEEt::;z113E5t:::::::::::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
36 #::tt:::::::::::::::::::::::3@@@E@@@@@@@@@@@@@@@@BEt::::::::::::::::t:::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
37 #:tt:t:::::::::::::::::::::::?S@@@@@@@@@@@BBEEE51!::::::::::::::zzzEt:::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
38 #::::::::::::::::::::::::::::::3Q@@@@@@@BEEEEEt:::::::::::::;zz@@@EE::::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
39 #::::::::::::::::::::::::::::::::75B@@@@@EEEtt;:::::::::;zz@@@@BEEEtz:::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
40 #::::::::::::::::::::::::::::::::::::?9@@@@@@@@@@@E2Ezg@@@@@B@@@EEEE1t::::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
41 #:::::::::::::::::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@E@EEEEEEEzzz::
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
42 #::::::::::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@EEEEEEE5ttttt
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
43 #:::::::::::::::::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEEEEEEEtzt
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
44 #::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@E@@EEEEEEEEEEEE@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
45 #::::::::::::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE3EEEE@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
46 #:::::::::::::::::::::;;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEt33@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
47 #:::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@E@@@@@@EEEtg@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
48 #::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE@@@@@@@@@@@@@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
49 #:::::::::::::@@@@@@@@@@@@@@@@@$@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
50 #::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
51 #
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
52 # Copyleft ↄ⃝ 2012 Institut Curie
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
53 # Author(s): Valentina Boeva, Alban Lermine (Institut Curie) 2012
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
54 # Contact: valentina.boeva@curie.fr, alban.lermine@curie.fr
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
55 # This software is distributed under the terms of the GNU General
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
56 # Public License, either Version 2, June 1991 or Version 3, June 2007.
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
57
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
58 #!/usr/bin/perl -w
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
59
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
60 #for a complete list of genes and a list of sites, sorts genes close to sites
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
61 #printDistance
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
62
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
63 #read only gene files with refSeqGenes
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
64 #counts only once overlapping transcripts or genes
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
65
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
66 #calculates some stats about locations of peaks
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
67
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
68 use strict;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
69
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
70 my $SitesFilename ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
71 my $SitesFilename2 ="";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
72 my $GenesFilename ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
73 my $MirFilename = "";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
74 my $BIGNUMBER = 10000000;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
75 my $verbose = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
76 my $header = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
77 my $noXY = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
78 my $minscore = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
79
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
80 my $usage = qq{
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
81 $0
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
82
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
83 -----------------------------
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
84 mandatory parameters:
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
85
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
86 -g filename file with all genes
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
87 -f filename file with sites in BED format
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
88
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
89 -----------------------------
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
90 optional parameters:
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
91 -v for verbose
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
92 -minScore minimal score
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
93 -mir filename file with positions of miRNA
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
94 -head if there is a header
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
95 -reg filename file with FoldChanges or expression values and annotation (should in colomn 4)
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
96 -noXY
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
97 -o filename output file
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
98 };
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
99
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
100 if(scalar(@ARGV) == 0){
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
101 print $usage;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
102 exit(0);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
103 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
104
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
105 my $ResFilename = "";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
106
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
107 my $regFilename = "";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
108
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
109 while(scalar(@ARGV) > 0){
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
110 my $this_arg = shift @ARGV;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
111 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
112
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
113 elsif ( $this_arg eq '-g') {$GenesFilename = shift @ARGV;}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
114 elsif ( $this_arg eq '-f') {$SitesFilename = shift @ARGV;}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
115 elsif ( $this_arg eq '-mir') {$MirFilename = shift @ARGV;}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
116 elsif ( $this_arg eq '-v') {$verbose = 1;}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
117 elsif ( $this_arg eq '-head') {$header = 1;}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
118 elsif ( $this_arg eq '-noXY') {$noXY = 1;}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
119 elsif ( $this_arg eq '-o') {$ResFilename = shift @ARGV;}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
120 elsif ( $this_arg eq '-reg') {$regFilename = shift @ARGV;}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
121 elsif ( $this_arg eq '-minScore') {$minscore = shift @ARGV;}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
122 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
123 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
124
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
125 if ($ResFilename eq "") {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
126 $ResFilename = $SitesFilename.".withGenes.txt";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
127 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
128
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
129
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
130
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
131 #------------read expression/foldChange annotation of genes---------
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
132
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
133 my %geneReg;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
134 if ( $regFilename eq ""){
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
135 print "you did not specify file with expression of fold change values\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
136 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
137 else {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
138 open (REG, "<$regFilename ") or die "Cannot open file $regFilename !!!!: $!";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
139 #Gene median mad flag
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
140 #RPL41 13.60755187 0.074233841 EXPRESSED
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
141 #A 1110013L07Rik 0.26 up-regulated
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
142
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
143 while (<REG>) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
144 chomp;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
145 my ($name, $value) = (split)[1,3];
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
146 $geneReg{$name} = $value;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
147 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
148 close REG;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
149 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
150 #-----------read genes----------------
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
151
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
152 my %genes;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
153
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
154 my $count = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
155
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
156 open (GENES, "<$GenesFilename") or die "Cannot open file $GenesFilename!!!!: $!";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
157 <GENES>;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
158
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
159 my ($name,$chr,$strand,$leftPos,$rightPos);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
160
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
161 while (<GENES>) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
162 chomp;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
163 if (/(chr\S*)\s(\d+)\s(\d+)\s([-]?1)\s\S+\s\S+\s(\S+)/){
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
164 $name = $5;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
165 $chr = $1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
166
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
167 $strand = $4;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
168 if ($strand eq '1') {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
169 $strand = 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
170 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
171 else {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
172 $strand = -1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
173 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
174 $leftPos = $2;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
175 $rightPos = $3;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
176 next if (($chr =~ m/[XY]/)&&($noXY));
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
177 next if ($name =~ m/MIR/);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
178 } elsif (/(chr\S*)\s([+-])\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\S+)\s(\S+)\s\S+\s(\S+)/) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
179 $name = $10;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
180 $chr = $1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
181 $strand = $2;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
182 $leftPos = $3;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
183 $rightPos = $4;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
184 next if (($chr =~ m/[XY]/)&&($noXY));
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
185 next if ($name =~ m/MIR/);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
186
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
187 if ($strand eq '+') {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
188 $strand = 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
189 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
190 else {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
191 $strand = -1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
192 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
193 } else {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
194 print "Wrong type: ",$_,"\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
195 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
196 my $ID = "$name\t$chr:$leftPos-$rightPos";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
197 my $reg = "NA";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
198 if (exists($geneReg{$name})) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
199 $reg = $geneReg{$name};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
200 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
201 unless (exists($genes{$chr})) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
202 my %h;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
203 $genes{$chr} = \%h;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
204 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
205 unless (exists($genes{$chr}->{$ID})) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
206 my %h1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
207 $genes{$chr}->{$ID} = \%h1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
208 $count++;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
209 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
210 $genes{$chr}->{$ID}{'name'} = $name ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
211 $genes{$chr}->{$ID}{'left'} = $leftPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
212 $genes{$chr}->{$ID}{'right'} = $rightPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
213
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
214 $genes{$chr}->{$ID}{'strand'} = $strand;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
215
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
216 $genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
217 $genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
218 $genes{$chr}->{$ID}{'reg'} = $reg;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
219 $genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
220 $genes{$chr}->{$ID}{'closestPicDist'} = $BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
221 $genes{$chr}->{$ID}{'closestPositivePicDist'} = $BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
222 $genes{$chr}->{$ID}{'closestNegativePicDist'} = -$BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
223
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
224 $genes{$chr}->{$ID}{'closestPicDistTE'} = $BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
225
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
226 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
227 print "Total genes : $count\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
228
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
229
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
230
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
231 close GENES;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
232 print "\t\t$GenesFilename is read!\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
233 #for my $gName (sort keys %{$genes{'chr18'}}) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
234
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
235 # print "$gName\t$genes{'chr18'}->{$gName}{'TSS'}\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
236 #}
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
237
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
238 #-----------read file with sites miRNA, store as genes-----
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
239
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
240 if ( $MirFilename eq ""){
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
241 print "you did not specify file with miRNA\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
242 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
243 else {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
244
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
245
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
246 open (MIR, "<$MirFilename ") or die "Cannot open file $MirFilename !!!!: $!";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
247 #chr1 20669090 20669163 mmu-mir-206 960 +
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
248 while (<MIR>) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
249 chomp;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
250 my ($name, $chr, $leftPos, $rightPos, $strand );
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
251 #1 . miRNA 20669091 20669163 . + . ACC="MI0000249"; ID="mmu-mir-206";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
252 if (/([0-9XYM]+)\s.\smiRNA\s(\d+)\s(\d+)\s.\s([+-])\s.\sACC=.*ID=\"(.*)\"/) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
253 $name = $5;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
254 $chr = $1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
255 $leftPos = $2;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
256 $rightPos = $3;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
257 $strand = $4;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
258 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
259 elsif (/(.*)\s(\d+)\s(\d+)\s(.*)\s(.*)\s(.*)/){
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
260 $name = $4;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
261 $chr = $1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
262 $leftPos = $2;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
263 $rightPos = $3;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
264 $strand = $6;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
265 } else {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
266 next;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
267 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
268
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
269 unless ($chr =~ m/chr/) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
270 $chr = "chr".$chr;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
271 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
272 my $ID = "$name\t$chr:$leftPos-$rightPos";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
273
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
274 if ($strand eq '+') {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
275 $strand = 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
276 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
277 else {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
278 $strand = -1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
279 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
280
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
281 unless (exists($genes{$chr})) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
282 my %h;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
283 $genes{$chr} = \%h;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
284 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
285 unless (exists($genes{$chr}->{$ID})) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
286 my %h1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
287 $genes{$chr}->{$ID} = \%h1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
288 $count++;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
289 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
290 $genes{$chr}->{$ID}{'name'} = $name ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
291 $genes{$chr}->{$ID}{'left'} = $leftPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
292 $genes{$chr}->{$ID}{'right'} = $rightPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
293 $genes{$chr}->{$ID}{'cdsStart'} = $leftPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
294 $genes{$chr}->{$ID}{'cdsEnd'} = $rightPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
295 $genes{$chr}->{$ID}{'strand'} = $strand;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
296 $genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
297 $genes{$chr}->{$ID}{'exonCount'} = 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
298 $genes{$chr}->{$ID}{'exonStarts'} = $leftPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
299 $genes{$chr}->{$ID}{'exonEnds'} = $rightPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
300 $genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
301 $genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
302 $genes{$chr}->{$ID}{'reg'} = "miRNA";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
303 ($genes{$chr}->{$ID}{'firstIntronStart'},$genes{$chr}->{$ID}{'firstIntronEnd'}) = (0,0);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
304 $genes{$chr}->{$ID}{'closestPicDist'} = $BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
305 $genes{$chr}->{$ID}{'closestPicDistTE'} = $BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
306 $genes{$chr}->{$ID}{'closestPositivePicDist'} = $BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
307 $genes{$chr}->{$ID}{'closestNegativePicDist'} = -$BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
308
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
309
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
310 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
311
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
312
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
313 close MIR;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
314 print "\t\t$MirFilename is read!\n" if ($verbose) ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
315 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
316
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
317
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
318
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
319 #-----------read file with sites and find overlapping genes-----
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
320 my $numberOfAllSites = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
321
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
322 my $lengthLosses = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
323 my $lengthGains = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
324
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
325 open (FILE, "<$SitesFilename") or die "Cannot open file $SitesFilename!!!!: $!";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
326 open (OUT , ">$ResFilename") or die "Cannot open file $ResFilename!!!!: $!";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
327 print OUT "chrom start end max_coord score Dist DistTE geneName geneCoord Reg\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
328 open (GENESOUT , ">$ResFilename.genes") or die "Cannot open file $ResFilename.genes!!!!: $!";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
329
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
330 print GENESOUT "Name\tCoord\tDist\tDistTE\tReg\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
331
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
332 my $correction = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
333
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
334 if ($header) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
335 my $headerString = <FILE>; #read header
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
336 my @a = split (/\t/,$header );
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
337
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
338 if ($a[1] =~m/chr/) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
339 $correction=1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
340 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
341 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
342
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
343 $count = 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
344 print $SitesFilename, "\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
345 while (<FILE>) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
346 chomp;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
347 next if (/track/);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
348 my $string = $_;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
349 chomp $string;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
350 $numberOfAllSites++;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
351 my @a = split /\t/;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
352 if (($a[1] =~m/chr/)&&($correction==0)) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
353 $correction=1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
354 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
355 my $chro = $a[0+$correction];
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
356 next if (($chro =~ m/[XY]/)&&($noXY));
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
357 unless ($chro =~ m/chr/) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
358 $chro = "chr".$chro;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
359 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
360
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
361 my $fpos = $a[1+$correction];
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
362 my $lpos = $a[2+$correction];
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
363 my $score = $a[4+$correction];
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
364 if ($minscore>0) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
365 next if ($score<$minscore);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
366 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
367 next if ($chro=~m/M/);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
368
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
369 print OUT $string, "\t";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
370
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
371 findClosestTSS ($fpos,$lpos,$chro,\%genes);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
372
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
373 $count ++;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
374 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
375 close FILE;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
376 close OUT ;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
377 close GENESOUT;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
378 print "Total Sites: $count\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
379
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
380 open (GENESOUT , ">$ResFilename.genes.ClosestPeakDist") or die "Cannot open file $ResFilename.genes.ClosestPeakDist!!!!: $!";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
381 print GENESOUT "Name Coord Dist DistTE Reg posDist negDist\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
382 for my $chro (keys %genes) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
383 for my $gName (keys %{$genes{$chro}}) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
384 print GENESOUT "$gName\t",$genes{$chro}->{$gName}{'closestPicDist'},"\t",$genes{$chro}->{$gName}{'closestPicDistTE'},"\t",$genes{$chro}->{$gName}{'reg'},"\t",$genes{$chro}->{$gName}{'closestPositivePicDist'},"\t",$genes{$chro}->{$gName}{'closestNegativePicDist'},"\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
385 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
386 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
387 close GENESOUT;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
388
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
389 #my @arr = @{$genes{'chr'}};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
390
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
391 sub overlapGenes {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
392 my ($otherGene,$bestGene,$chro,$genes)= @_;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
393 my $a1 = $genes->{$chro}{$otherGene}{'left'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
394 my $a2 = $genes->{$chro}{$bestGene}{'left'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
395 my $e1 = $genes->{$chro}{$otherGene}{'right'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
396 my $e2 = $genes->{$chro}{$bestGene}{'right'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
397 if (($a1 >= $a2)&&($a1 <= $e2)) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
398 return 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
399 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
400 if (($a2 >= $a1)&&($a2 <= $e1)) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
401 return 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
402 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
403 return 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
404 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
405
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
406 sub overlap {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
407 my ($a1,$a2,$e1,$e2)= @_; #e for genes
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
408
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
409 if ($a1 > $a2) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
410 ($a1,$a2) = ($a2,$a1);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
411 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
412
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
413 if ($e1 > $e2) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
414 ($e1,$e2) = ($e2,$e1);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
415 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
416
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
417
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
418 if (($a1 >= $e1)&&($a1 <= $e2)) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
419 return 2;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
420 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
421 if (($a2 >= $e1)&&($a2 <= $e2)) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
422 return 2;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
423 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
424 if (($e1 >= $a1)&&($e2 <= $a2)) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
425 return 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
426 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
427
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
428 return 0;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
429 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
430
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
431
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
432 sub findClosestTSS {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
433 my ($fpos,$lpos,$chro,$genes) = @_;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
434 my $pos = ($fpos+$lpos)/2;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
435 my $gene = "";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
436 my $tssDist=$BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
437 my $teDist=$BIGNUMBER;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
438
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
439 for my $gName (keys %{$genes->{$chro}}) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
440 my $TSS = $genes->{$chro}{$gName}{'TSS'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
441 #print $gName,":",$TSS," " if ($chro eq "chr15");
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
442 if (abs($pos-$TSS)<abs($tssDist)) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
443 $gene=$gName;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
444 $tssDist = ($pos-$TSS)*$genes->{$chro}{$gName}{'strand'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
445 $teDist = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
446 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
447 if (abs($pos-$TSS)<$BIGNUMBER) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
448 print GENESOUT "$gName\t",($pos-$TSS)*$genes->{$chro}{$gName}{'strand'},"\t",($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'},"\t",$genes->{$chro}{$gName}{'reg'},"\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
449
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
450 if (abs($pos-$TSS)<abs($genes->{$chro}{$gName}{'closestPicDist'})) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
451 $genes->{$chro}{$gName}{'closestPicDist'} = ($pos-$TSS)*$genes->{$chro}{$gName}{'strand'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
452 $genes->{$chro}{$gName}{'closestPicDistTE'} = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
453 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
454 if (($pos-$TSS)*$genes->{$chro}{$gName}{'strand'}>=0 && abs($pos-$TSS)<$genes{$chro}->{$gName}{'closestPositivePicDist'} ) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
455 $genes->{$chro}{$gName}{'closestPositivePicDist'} = ($pos-$TSS)*$genes->{$chro}{$gName}{'strand'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
456 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
457 if (($pos-$TSS)*$genes->{$chro}{$gName}{'strand'}<=0 && -1*abs($pos-$TSS)>$genes{$chro}->{$gName}{'closestNegativePicDist'} ) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
458 $genes->{$chro}{$gName}{'closestNegativePicDist'} = ($pos-$TSS)*$genes->{$chro}{$gName}{'strand'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
459 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
460
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
461 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
462 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
463 print OUT "$tssDist\t$teDist\t$gene\t$genes{$chro}->{$gene}{'reg'}\n" if ($gene ne "");
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
464 if ($gene eq "") {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
465 print OUT "$tssDist $teDist NA NA NA\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
466 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
467
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
468 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
469
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
470 sub printOvelapingGenes {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
471 my ($fpos,$lpos,$chro,$genes,$globalH,$type) = @_;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
472 #print $fpos,$lpos,$chro,$genes,$globalH,$type,"\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
473 my %hash1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
474 my %hash2;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
475 my @array2return;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
476 for my $gName (keys %{$genes->{$chro}}) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
477 my $TSS = $genes->{$chro}{$gName}{'TSS'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
478 my $TE = $genes->{$chro}{$gName}{'TE'};
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
479
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
480 if (overlap($fpos,$lpos,$TSS,$TE)==1) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
481 $hash1{$gName} = overlap($fpos,$lpos,$TSS,$TE);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
482 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
483 if (overlap($fpos,$lpos,$TSS,$TE)==2) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
484 $hash2{$gName} = overlap($fpos,$lpos,$TSS,$TE);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
485 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
486 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
487 print OUT join(",", sort keys %hash1),"\t",join(",", sort keys %hash2),"\n";
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
488 if ($type eq "loss") {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
489 for my $key (sort keys %hash1) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
490 $globalH->{$key} = 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
491 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
492 for my $key (sort keys %hash2) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
493 $globalH->{$key} = 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
494 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
495 }else {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
496 for my $key (sort keys %hash1) {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
497 $globalH->{$key} = 1;
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
498 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
499 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
500
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
501 }
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
502
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
503 sub min {
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
504 return $_[0] if ($_[0]<$_[1]);
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
505 $_[1];
2a1425b5f4eb Uploaded
jbrayet
parents:
diff changeset
506 }