annotate geneAnnotation_hist.pl @ 6:94be3cc05503 draft default tip

Uploaded
author jbrayet
date Tue, 05 Jan 2016 08:16:10 -0500
parents 355bc8ae2487
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
1 #!/usr/bin/perl -w
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
2 #outputs statistics for all genes in the list
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
3
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
4 #different boundaries
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
5 #no motif p-value for binding sites
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
6 #read directly No-resp/Up/down category
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
7
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
8 #all isoforms from the file with genes
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
9
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
10 #RNApolII sites on junctions
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
11
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
12 #for HISTONES!!!!
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
13
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
14 use strict;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
15 use POSIX;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
16 use warnings;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
17
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
18 my $usage = qq{
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
19 $0
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
20
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
21 -----------------------------
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
22 mandatory parameters:
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
23
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
24 -g filename file with all genes
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
25 -tf filename file with sites of TF1
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
26
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
27
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
28 -----------------------------
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
29 optional parameters:
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
30 -k36 filename file with sites of K36me3
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
31 -rp filename file with sites of RNApolII
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
32 -i filename file with a table where to add colomnes
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
33 -add values which colomns to add
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
34
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
35 -o filename output filename (defaut "genes.output.txt")
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
36 -v verbose mode
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
37 -mir filename file with positions of miRNA
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
38 -k9 filename ile with sites of K9me3
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
39
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
40 -c_rp value cutoff for -rp
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
41 -c_k9 value cutoff for -k9
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
42 -c_k36 value cutoff for -k36
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
43
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
44 -selG filename selected genes (up-down-regulated)
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
45 -fluo filename file with fluorescence
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
46 -gc filename file with gc-islands
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
47
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
48 -long for each gene take the longest isoform
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
49
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
50
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
51 };
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
52
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
53 if(scalar(@ARGV) == 0){
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
54 print $usage;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
55 exit(0);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
56 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
57
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
58 ## mandatory arguments
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
59
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
60 my $RNApolFilename = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
61 my $H3K36Me3polFilename = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
62 my $H3K9Me3polFilename = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
63 my $TF1Filename = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
64 my $TF2Filename = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
65 my $GenesFilename = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
66 my $MirFilename = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
67 my $TF1FilenameALL = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
68 my $TF2FilenameALL = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
69 my $SelectedGenesFilename = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
70 my $fluoFile = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
71 my $initialTable = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
72 my $colomnesToAdd = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
73
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
74 my $enhLeft = -30000;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
75 my $enhRight = -1500;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
76 my $immediateDownstream = 2000;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
77 my $K9dist = 5000;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
78 my $kb5 = 5000;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
79 my $INFINITY = 10000000000;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
80 my $jonctionSize = 50;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
81 ## optional arguments
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
82 my $outname = "genes.output.txt";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
83 my $verbose = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
84 my $GCislands = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
85
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
86
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
87 my $longest = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
88
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
89 #my $cutoff_tf1 = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
90 #my $cutoff_tf2 = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
91 my $cutoff_tf1All = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
92 my $cutoff_tf2All = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
93 my $cutoff_rp = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
94 my $cutoff_k9 = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
95 my $cutoff_k36 = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
96 my $ifTFcoord = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
97
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
98 my $ifHMmode = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
99
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
100 ## parse command line arguments
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
101
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
102 while(scalar(@ARGV) > 0){
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
103 my $this_arg = shift @ARGV;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
104 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
105
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
106 elsif ( $this_arg eq '-selG') {$SelectedGenesFilename = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
107
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
108 elsif ( $this_arg eq '-g') {$GenesFilename = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
109 elsif ( $this_arg eq '-rp') {$RNApolFilename = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
110 elsif ( $this_arg eq '-k36') {$H3K36Me3polFilename = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
111 elsif ( $this_arg eq '-k9') {$H3K9Me3polFilename = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
112
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
113 elsif ( $this_arg eq '-tf') {$TF1Filename = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
114
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
115 elsif ( $this_arg eq '-v') {$verbose = 1;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
116
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
117 elsif ( $this_arg eq '-long') {$longest = 1;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
118
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
119
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
120 elsif ( $this_arg eq '-o') {$outname = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
121 elsif ( $this_arg eq '-mir') {$MirFilename = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
122
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
123 elsif ( $this_arg eq '-c_rp') {$cutoff_rp = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
124 elsif ( $this_arg eq '-c_k9') {$cutoff_k9 = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
125 elsif ( $this_arg eq '-c_k36') {$cutoff_k36 = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
126
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
127 elsif ( $this_arg eq '-fluo') {$fluoFile = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
128 elsif ( $this_arg eq '-gc') {$GCislands = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
129
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
130 elsif ( $this_arg eq '-i') {$initialTable = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
131 elsif ( $this_arg eq '-add') {$colomnesToAdd = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
132
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
133 elsif ( $this_arg eq '-lp') {$enhRight = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
134 elsif ( $this_arg eq '-rightp') {$immediateDownstream = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
135 elsif ( $this_arg eq '-enh') {$enhLeft = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
136 elsif ( $this_arg eq '-dg') {$kb5 = shift @ARGV;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
137
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
138
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
139
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
140 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
141 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
142
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
143 my $maxDistFromGene = max(abs($enhLeft),abs($kb5));
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
144
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
145 if ( $GenesFilename eq "" ){
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
146 die "you should specify a file with genes \n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
147 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
148 if(( $RNApolFilename eq "")&&($H3K36Me3polFilename eq "")&&($TF1Filename eq "")&&($H3K9Me3polFilename eq "")){
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
149 die "you should specify at least one file with peaks\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
150 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
151
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
152
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
153 #-----------read selected genes----------------
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
154 my %selectedGenes;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
155 my %selectedGenesFoldChange;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
156 if ( $SelectedGenesFilename ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
157 open (FILE, "<$SelectedGenesFilename ") or die "Cannot open file $SelectedGenesFilename !!!!: $!";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
158 while (<FILE>) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
159 s/\R//g;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
160 my @a = split/\t/;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
161 $selectedGenes{$a[1]} = $a[3];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
162 $selectedGenesFoldChange{$a[1]} = $a[2];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
163 #print "gene:$a[1],reg:$selectedGenes{$a[1]},FC:$selectedGenesFoldChange{$a[1]}\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
164 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
165
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
166 close FILE;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
167 print "\t\t$SelectedGenesFilename is read!\n" if ($verbose);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
168 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
169
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
170 #-----------read genes with fluorescence---------
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
171 my %fluoGenes;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
172 if ( $fluoFile ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
173 open (FILE, "<$fluoFile ") or die "Cannot open file $fluoFile !!!!: $!";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
174 my $gene = "";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
175 my $med = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
176 my %h;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
177 while (<FILE>) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
178 s/\R//g;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
179 my @a = split/\t/;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
180
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
181 next if (scalar(@a)<5);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
182 next if ($a[0] eq "probesets");
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
183 next unless ($a[0] =~m/\S/);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
184 next unless ($a[4] =~m/\S/);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
185 if ($gene ne "" && $gene ne $a[2]) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
186 #calcMed
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
187 $med = med(keys %h);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
188 $fluoGenes{$gene} = $med;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
189 $med=0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
190 delete @h{keys %h};
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
191 } else {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
192 #$h{$a[4]} = 1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
193 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
194 $gene = $a[2];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
195 $h{$a[4]} = 1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
196 #print "keys ", scalar(keys %h),"\t",keys %h,"\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
197 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
198 if ($gene ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
199 $med = med(keys %h);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
200 $fluoGenes{$gene} = $med;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
201 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
202 close FILE;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
203 print "\t\t$fluoFile is read!\n" if ($verbose);;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
204 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
205 #-----------read GC-islands----------------
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
206 my %GCislands;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
207 if ($GCislands ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
208 open (FILE, "<$GCislands ") or die "Cannot open file $GCislands !!!!: $!";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
209
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
210 while (<FILE>) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
211 s/\R//g;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
212 my @a = split/\t/;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
213 #bin chrom chromStart chromEnd name length cpgNum gcNum perCpg perGc obsExp
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
214 #107 chr1 36568608 36569851 CpG: 128 1243 128 766 20.6 61.6 1.09
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
215 my $chr = $a[1];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
216 my $start = $a[2];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
217 my $end = $a[3];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
218 $GCislands{$chr}->{$start}=$end;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
219 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
220 close FILE;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
221 if ($verbose) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
222 print "$GCislands is read\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
223 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
224 } elsif ($verbose) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
225 print "you did not specify a file with GC-islands\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
226 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
227
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
228 #-----------read genes----------------
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
229
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
230 my %genes;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
231
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
232 my $count = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
233
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
234 open (GENES, "<$GenesFilename") or die "Cannot open file $GenesFilename!!!!: $!";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
235 <GENES>;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
236 while (<GENES>) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
237 s/\R//g;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
238 if (/(chr.*)\s([+-])\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\S+)\s(\S+)\s\S+\s(\S+)/){
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
239 my $name = $10;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
240 my $chr = $1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
241 my $strand = $2;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
242 if ($strand eq '+') {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
243 $strand = 1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
244 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
245 else {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
246 $strand = -1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
247 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
248 my $leftPos = $3;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
249 my $rightPos = $4;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
250 my $cdsStart= $5;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
251 my $cdsEnd= $6;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
252 my $exonCount= $7;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
253 my $exonStarts= $8;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
254 my $exonEnds= $9;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
255 my $ID = "$name\t$chr:$leftPos-$rightPos\t$count";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
256 my $foldChange = 1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
257 my $reg = "NA";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
258 my $fluo = "NA";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
259 if ( $SelectedGenesFilename ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
260 #print "$name\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
261 if (exists($selectedGenes{$name})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
262 $reg = $selectedGenes{$name};
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
263 $foldChange = $selectedGenesFoldChange{$name};
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
264 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
265 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
266 if ( $fluoFile ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
267 if (exists($fluoGenes{$name})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
268 $fluo = $fluoGenes{$name};
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
269 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
270 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
271 unless (exists($genes{$chr})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
272 my %h;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
273 $genes{$chr} = \%h;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
274 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
275
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
276 my $RNAlength = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
277 my $skip = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
278
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
279 #print "$ID\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
280 if($longest) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
281 $RNAlength = getRNAlength($exonStarts,$exonEnds);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
282 for my $IDgene (keys %{$genes{$chr}}) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
283 my $nameGene= (split('\t', $IDgene))[0];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
284 if ($nameGene eq $name && $RNAlength > $genes{$chr}->{$IDgene}{'RNAlength'}) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
285 #print "found longer isofome: $ID longer than $IDgene\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
286 # print "$RNAlength > ".$genes{$chr}->{$IDgene}{'RNAlength'}."\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
287
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
288 $ID=$IDgene;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
289 } elsif ($nameGene eq $name && $RNAlength <= $genes{$chr}->{$IDgene}{'RNAlength'}) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
290 #print "found shorter isofome: $ID shorted than $IDgene\nwill skip it\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
291 #print "$RNAlength <= ".$genes{$chr}->{$IDgene}{'RNAlength'}."\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
292 $skip = 1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
293 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
294 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
295 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
296
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
297
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
298 unless ($skip) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
299
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
300 unless (exists($genes{$chr}->{$ID})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
301 my %h1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
302 $genes{$chr}->{$ID} = \%h1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
303 $count++;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
304 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
305
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
306 $genes{$chr}->{$ID}{'name'} = $name ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
307 $genes{$chr}->{$ID}{'left'} = $leftPos ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
308 $genes{$chr}->{$ID}{'right'} = $rightPos ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
309 $genes{$chr}->{$ID}{'cdsStart'} = $cdsStart;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
310 $genes{$chr}->{$ID}{'cdsEnd'} = $cdsEnd;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
311 $genes{$chr}->{$ID}{'strand'} = $strand;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
312 $genes{$chr}->{$ID}{'exonCount'} = $exonCount;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
313 $genes{$chr}->{$ID}{'exonStarts'} = $exonStarts;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
314 $genes{$chr}->{$ID}{'exonEnds'} = $exonEnds;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
315 $genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
316 $genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
317 $genes{$chr}->{$ID}{'reg'} = $reg;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
318 $genes{$chr}->{$ID}{'foldChange'} = $foldChange;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
319 $genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
320 $genes{$chr}->{$ID}{'RNAlength'} = $RNAlength ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
321
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
322 my $enhStart = min($genes{$chr}->{$ID}{'TSS'}-$enhLeft*$genes{$chr}->{$ID}{'strand'},$genes{$chr}->{$ID}{'TSS'}-$enhRight*$genes{$chr}->{$ID}{'strand'});
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
323 my $enhEnd = max($genes{$chr}->{$ID}{'TSS'}-$enhLeft*$genes{$chr}->{$ID}{'strand'},$genes{$chr}->{$ID}{'TSS'}-$enhRight*$genes{$chr}->{$ID}{'strand'});
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
324 $genes{$chr}->{$ID}{'enhStart'} =$enhStart;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
325 $genes{$chr}->{$ID}{'enhEnd'} =$enhEnd;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
326
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
327 my $IntraStart = min($genes{$chr}->{$ID}{'TSS'}+$immediateDownstream*$genes{$chr}->{$ID}{'strand'},$genes{$chr}->{$ID}{'TE'});
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
328 my $IntraEnd = max($genes{$chr}->{$ID}{'TSS'}+$immediateDownstream*$genes{$chr}->{$ID}{'strand'},$genes{$chr}->{$ID}{'TE'});
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
329 $genes{$chr}->{$ID}{'IntraStart'} =$IntraStart;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
330 $genes{$chr}->{$ID}{'IntraEnd'} =$IntraEnd;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
331
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
332 $genes{$chr}->{$ID}{'fluo'} = $fluo;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
333
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
334 $genes{$chr}->{$ID}{'RNApolScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
335 $genes{$chr}->{$ID}{'RNApolDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
336 $genes{$chr}->{$ID}{'RNApol_junctionScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
337 $genes{$chr}->{$ID}{'RNApol_junctionDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
338
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
339 $genes{$chr}->{$ID}{'normalizedGBodyScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
340 $genes{$chr}->{$ID}{'K9promScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
341 $genes{$chr}->{$ID}{'K9promDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
342 $genes{$chr}->{$ID}{'K9largeScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
343 $genes{$chr}->{$ID}{'K9largeDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
344 $genes{$chr}->{$ID}{'largePromScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
345 $genes{$chr}->{$ID}{'promDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
346 $genes{$chr}->{$ID}{'EnhScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
347 $genes{$chr}->{$ID}{'EnhDistTSS'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
348 $genes{$chr}->{$ID}{'intraScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
349 $genes{$chr}->{$ID}{'intraDistTSS'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
350 $genes{$chr}->{$ID}{'allScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
351 $genes{$chr}->{$ID}{'allDistTSS'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
352
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
353
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
354 $genes{$chr}->{$ID}{'TFFirstIntronAndIntraScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
355 $genes{$chr}->{$ID}{'TFFirstIntronAndIntraDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
356 $genes{$chr}->{$ID}{'firstIntronScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
357 $genes{$chr}->{$ID}{'firstIntronDistTSS'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
358 $genes{$chr}->{$ID}{'TFintraMinusFirstIntronScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
359 $genes{$chr}->{$ID}{'TFintraMinusFirstIntronDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
360 $genes{$chr}->{$ID}{'immDownScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
361 $genes{$chr}->{$ID}{'immDownDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
362 $genes{$chr}->{$ID}{'promSimpleScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
363 $genes{$chr}->{$ID}{'promSimpleDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
364 $genes{$chr}->{$ID}{'TFenh60kbScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
365 $genes{$chr}->{$ID}{'TFenh60kbDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
366
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
367 $genes{$chr}->{$ID}{'TF_FirstExonScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
368 $genes{$chr}->{$ID}{'TF_FirstExonDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
369 $genes{$chr}->{$ID}{'TF_junctionScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
370 $genes{$chr}->{$ID}{'TF_junctionDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
371
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
372 $genes{$chr}->{$ID}{'TF_junctionAndIntraScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
373 $genes{$chr}->{$ID}{'TF_junctionAndIntraDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
374 $genes{$chr}->{$ID}{'TF_FirstExonAndIntraScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
375 $genes{$chr}->{$ID}{'TF_FirstExonAndIntraDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
376
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
377 $genes{$chr}->{$ID}{'TF_OtherExonsScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
378 $genes{$chr}->{$ID}{'TF_OtherExonsDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
379 $genes{$chr}->{$ID}{'TF_OtherExonsAndIntraScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
380 $genes{$chr}->{$ID}{'TF_OtherExonsAndIntraDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
381
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
382
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
383 $genes{$chr}->{$ID}{'TF_OtherIntronsScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
384 $genes{$chr}->{$ID}{'TF_OtherIntronsDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
385 $genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
386 $genes{$chr}->{$ID}{'TF_OtherIntronsAndIntraDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
387
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
388 $genes{$chr}->{$ID}{'geneDownstreamScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
389 $genes{$chr}->{$ID}{'geneDownstreamDist'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
390 $genes{$chr}->{$ID}{'K9enhScore'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
391 $genes{$chr}->{$ID}{'K9EnhDistTSS'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
392
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
393
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
394 $genes{$chr}->{$ID}{'score_GeneBody'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
395 $genes{$chr}->{$ID}{'GeneBodyPeak_DistToTSS'} = $INFINITY;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
396
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
397 $genes{$chr}->{$ID}{'GCisland'} = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
398 if ($GCislands ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
399 $genes{$chr}->{$ID}{'GCisland'} = checkIfGC ($genes{$chr}->{$ID}{'TSS'},$strand,2000,$GCislands{$chr});
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
400 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
401 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
402 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
403 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
404
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
405 print "Total genes (including isoforms) : $count\n" ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
406 close GENES;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
407 print "\t\t$GenesFilename is read!\n" if ($verbose);;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
408 #for my $gName (sort keys %{$genes{'chr18'}}) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
409
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
410 # print "$gName\t$genes{'chr18'}->{$gName}{'TSS'}\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
411 #}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
412
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
413
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
414 #-----------read file with sites of TF1-----
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
415 my $numberOfAllSites = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
416
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
417 if ($TF1Filename eq "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
418 print "No file with peaks of TF1!\n" if ($verbose) ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
419 } else {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
420 open (FILE, "<$TF1Filename ") or die "Cannot open file $TF1Filename !!!!: $!";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
421 $_ = <FILE>;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
422 my $correction = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
423 my @a = split /\t/;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
424 if (scalar(@a)>1 && $a[1] =~ m/chr/ ) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
425 $correction = 1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
426 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
427
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
428 while (<FILE>) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
429 s/\R//g;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
430
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
431 my @a = split /\t/;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
432
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
433 my $chr = $a[0+$correction];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
434 my $firstPos = $a[1+$correction];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
435 my $LastPos = $a[2+$correction];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
436
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
437 my $maxPos = int(($firstPos+$LastPos)/2);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
438 if (scalar(@a)>(3+$correction)) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
439 $maxPos = $a[3+$correction];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
440 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
441 if ($maxPos =~ m/\D/) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
442 $maxPos = int(($firstPos+$LastPos)/2);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
443 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
444 my $score = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
445 if (scalar(@a)>(4+$correction)) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
446 $score = $a[4+$correction];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
447 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
448 for my $ID (keys %{$genes{$chr}}) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
449 my $distTSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
450 if ($firstPos<=$genes{$chr}->{$ID}{'TSS'} && $LastPos>=$genes{$chr}->{$ID}{'TSS'}) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
451 $distTSS = 0 ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
452 } else {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
453 $distTSS = ($firstPos - $genes{$chr}->{$ID}{'TSS'})*$genes{$chr}->{$ID}{'strand'};
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
454 $distTSS = ($LastPos - $genes{$chr}->{$ID}{'TSS'})*$genes{$chr}->{$ID}{'strand'} if (abs($LastPos - $genes{$chr}->{$ID}{'TSS'})<abs($distTSS));
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
455 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
456
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
457 my $distTE;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
458 if ($firstPos<=$genes{$chr}->{$ID}{'TE'} && $LastPos>=$genes{$chr}->{$ID}{'TE'}) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
459 $distTE = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
460 } else {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
461 $distTE = ($firstPos - $genes{$chr}->{$ID}{'TE'})*$genes{$chr}->{$ID}{'strand'};
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
462 $distTE = ($LastPos - $genes{$chr}->{$ID}{'TE'})*$genes{$chr}->{$ID}{'strand'} if (abs($LastPos - $genes{$chr}->{$ID}{'TE'})<abs($distTE));
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
463 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
464
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
465 if (($distTSS>= $enhLeft)&&($distTE<=$kb5)) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
466 if ($genes{$chr}->{$ID}{'allScore'}<$score) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
467 $genes{$chr}->{$ID}{'allScore'}=$score;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
468 $genes{$chr}->{$ID}{'allDistTSS'} = $distTSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
469 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
470 } else {next;}
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
471
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
472 my $hmCorDist = -1;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
473 if ($LastPos>=$genes{$chr}->{$ID}{'left'} && $firstPos <= $genes{$chr}->{$ID}{'right'} ) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
474 $hmCorDist = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
475 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
476
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
477 if ($hmCorDist == 0) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
478 if ($genes{$chr}->{$ID}{'score_GeneBody'}<$score) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
479 $genes{$chr}->{$ID}{'score_GeneBody'}=$score;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
480 $genes{$chr}->{$ID}{'GeneBodyPeak_DistToTSS'} = $distTSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
481 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
482
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
483
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
484 my $scoreToadd = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
485
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
486 if (($firstPos>=$genes{$chr}->{$ID}{'left'})&&($LastPos<=$genes{$chr}->{$ID}{'right'})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
487 $scoreToadd = $score/2.*($LastPos-$firstPos+1)/($genes{$chr}->{$ID}{'right'}-$genes{$chr}->{$ID}{'left'}+1);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
488 } elsif (($firstPos>=$genes{$chr}->{$ID}{'left'})&&($firstPos<$genes{$chr}->{$ID}{'right'})&&($LastPos>$genes{$chr}->{$ID}{'right'})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
489 $scoreToadd = $score/2.*($genes{$chr}->{$ID}{'right'}-$firstPos+1)/($genes{$chr}->{$ID}{'right'}-$genes{$chr}->{$ID}{'left'}+1);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
490 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
491 elsif (($firstPos<$genes{$chr}->{$ID}{'left'})&&($LastPos>$genes{$chr}->{$ID}{'left'})&&($LastPos<=$genes{$chr}->{$ID}{'right'})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
492 $scoreToadd = $score/2.*($LastPos-$genes{$chr}->{$ID}{'left'}+1)/($genes{$chr}->{$ID}{'right'}-$genes{$chr}->{$ID}{'left'}+1);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
493 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
494 elsif (($firstPos<$genes{$chr}->{$ID}{'left'})&&($LastPos>$genes{$chr}->{$ID}{'right'})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
495 $scoreToadd = $score/2.;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
496 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
497 $genes{$chr}->{$ID}{'normalizedGBodyScore'} += $scoreToadd;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
498 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
499
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
500 if ( $LastPos>=$genes{$chr}->{$ID}{'enhStart'} && $firstPos <=$genes{$chr}->{$ID}{'enhEnd'} ) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
501 if ($genes{$chr}->{$ID}{'EnhScore'}<$score) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
502 $genes{$chr}->{$ID}{'EnhScore'}=$score;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
503 $genes{$chr}->{$ID}{'EnhDistTSS'} = $distTSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
504 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
505 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
506 if (($distTSS>= $enhRight)&&($distTSS<=$immediateDownstream)) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
507 if ($genes{$chr}->{$ID}{'largePromScore'}<$score) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
508 $genes{$chr}->{$ID}{'largePromScore'}=$score;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
509 $genes{$chr}->{$ID}{'promDist'} = $distTSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
510 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
511 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
512 if ( $LastPos>=$genes{$chr}->{$ID}{'IntraStart'} && $firstPos <= $genes{$chr}->{$ID}{'IntraEnd'} ) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
513 if ($genes{$chr}->{$ID}{'intraScore'}<$score) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
514 $genes{$chr}->{$ID}{'intraScore'}=$score;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
515 $genes{$chr}->{$ID}{'intraDistTSS'} = $distTSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
516 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
517 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
518
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
519
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
520 if (($distTSS>= 0)&&($distTSS<=$immediateDownstream)) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
521 if ($genes{$chr}->{$ID}{'immDownScore'}<$score) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
522 $genes{$chr}->{$ID}{'immDownScore'}=$score;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
523 $genes{$chr}->{$ID}{'immDownDist'} = $distTSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
524 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
525 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
526 if (($distTSS<= 0)&&($distTSS>=$enhRight)) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
527 if ($genes{$chr}->{$ID}{'promSimpleScore'}<$score) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
528 $genes{$chr}->{$ID}{'promSimpleScore'}=$score;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
529 $genes{$chr}->{$ID}{'promSimpleDist'} = $distTSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
530 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
531 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
532
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
533 if (($distTE>=0)&&($distTE<=$kb5)) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
534 if ($genes{$chr}->{$ID}{'geneDownstreamScore'}<$score) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
535 $genes{$chr}->{$ID}{'geneDownstreamScore'}=$score;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
536 $genes{$chr}->{$ID}{'geneDownstreamDist'} = $distTSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
537 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
538 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
539
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
540 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
541 $numberOfAllSites++;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
542 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
543
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
544 close FILE;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
545 print "\t$TF1Filename is read!\n" if ($verbose) ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
546 print "$numberOfAllSites sites\n" ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
547 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
548
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
549
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
550
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
551
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
552 open (OUT , ">$outname") or die "Cannot open file $outname!!!!: $!";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
553
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
554 print OUT "name\tchr\tstart\tend\tstrand\tReg\tfoldChange\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
555
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
556 if ($GCislands ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
557 print OUT "GC-island\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
558 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
559
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
560 if ( $fluoFile ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
561 print OUT "fluorescence\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
562 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
563
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
564 print OUT "GeneBodyNormalizedScore\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
565
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
566 if ($TF1Filename ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
567 print OUT "score_Gene\tdistTSS_Gene\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
568 print OUT "score_Promoter\tdistTSS_Promoter\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
569 print OUT "score_ImmDown\tdistTSS_ImmDown\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
570 print OUT "score_PromoterORImmDown\tdistTSS_PromoterORImmDown\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
571 print OUT "score_Enhancer\tdistTSS_Enhancer\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
572 print OUT "score_Intragenic\tdistTSS_Intragenic\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
573 print OUT "score_GeneDownstream\tdistTSS_GeneDownstream\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
574
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
575
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
576 print OUT "score_GeneBody\tGeneBodyPeak_DistToTSS"
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
577 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
578
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
579 print OUT "\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
580
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
581 for my $chr (keys %genes) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
582 for my $ID (keys %{$genes{$chr}}) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
583 print OUT "$genes{$chr}->{$ID}{'name'}\t$chr\t$genes{$chr}->{$ID}{'left'}\t$genes{$chr}->{$ID}{'right'}\t$genes{$chr}->{$ID}{'strand'}\t$genes{$chr}->{$ID}{'reg'}\t$genes{$chr}->{$ID}{'foldChange'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
584
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
585 if ($GCislands ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
586 print OUT "$genes{$chr}->{$ID}{'GCisland'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
587 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
588 if ( $fluoFile ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
589 print OUT "$genes{$chr}->{$ID}{'fluo'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
590 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
591
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
592 print OUT "$genes{$chr}->{$ID}{'normalizedGBodyScore'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
593
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
594 if ($TF1Filename ne "") {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
595 print OUT "$genes{$chr}->{$ID}{'allScore'}\t$genes{$chr}->{$ID}{'allDistTSS'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
596 print OUT "$genes{$chr}->{$ID}{'promSimpleScore'}\t$genes{$chr}->{$ID}{'promSimpleDist'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
597 print OUT "$genes{$chr}->{$ID}{'immDownScore'}\t$genes{$chr}->{$ID}{'immDownDist'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
598 print OUT "$genes{$chr}->{$ID}{'largePromScore'}\t$genes{$chr}->{$ID}{'promDist'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
599 print OUT "$genes{$chr}->{$ID}{'EnhScore'}\t$genes{$chr}->{$ID}{'EnhDistTSS'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
600 print OUT "$genes{$chr}->{$ID}{'intraScore'}\t$genes{$chr}->{$ID}{'intraDistTSS'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
601 print OUT "$genes{$chr}->{$ID}{'geneDownstreamScore'}\t$genes{$chr}->{$ID}{'geneDownstreamDist'}\t";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
602
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
603 print OUT "$genes{$chr}->{$ID}{'score_GeneBody'}\t$genes{$chr}->{$ID}{'GeneBodyPeak_DistToTSS'}"
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
604
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
605 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
606 print OUT "\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
607 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
608 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
609
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
610 close OUT;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
611
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
612 ###################################
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
613 sub med {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
614 my @arr = @_;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
615 my $med = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
616 @arr = sort {$a <=> $b} @arr;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
617 if ((scalar(@arr)/2) =~ m/[\.\,]5/) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
618 return $arr[floor(scalar(@arr)/2)];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
619 } else {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
620 return ($arr[scalar(@arr)/2]+$arr[scalar(@arr)/2-1])/2;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
621 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
622 $med;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
623 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
624
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
625 sub checkIfGC {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
626 my ($TSS,$strand,$dist,$GCislandsChr)=@_;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
627 my $ifGC = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
628 my $leftProm=$TSS-$dist;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
629 my $rightProm = $TSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
630 if ($strand== -1) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
631 my $leftProm=$TSS;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
632 my $rightProm = $TSS+$dist;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
633 } #print "$leftProm\t"; print "$rightProm\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
634 for my $leftGC (keys %{$GCislandsChr}) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
635 my $rightGC = $GCislandsChr->{$leftGC};
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
636 if ($leftGC>=$leftProm&&$leftGC<=$rightProm || $rightGC>=$leftProm&&$rightGC<=$rightProm) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
637 return "GC-island";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
638 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
639 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
640 return $ifGC ;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
641 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
642
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
643 sub getFirstIntron {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
644 my ($exonCount,$exonStarts,$exonEnds,$strand) = @_;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
645 my ($left,$right);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
646 if ($exonCount == 1) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
647 return (0,0);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
648 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
649 if ($strand == 1) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
650 $left = (split ",", $exonEnds)[0];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
651 $right = (split (",", $exonStarts))[1];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
652 } else {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
653 $left = (split (",", $exonEnds))[$exonCount-2];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
654 $right = (split (",", $exonStarts))[$exonCount-1];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
655 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
656 ($left,$right);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
657 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
658
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
659 sub getFirstExon {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
660 my ($exonCount,$exonStarts,$exonEnds,$strand) = @_;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
661 my ($left,$right);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
662 if ($exonCount == 1) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
663 return (0,0);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
664 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
665 if ($strand == 1) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
666 $left = (split ",", $exonStarts)[0];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
667 $right = (split (",", $exonEnds))[0]-$jonctionSize;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
668 } else {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
669 $left = (split (",", $exonStarts))[$exonCount-1]+$jonctionSize;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
670 $right = (split (",", $exonEnds))[$exonCount-1];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
671 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
672 ($left,$right);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
673 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
674
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
675
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
676 sub getIntronExon {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
677 my ($pos,$exonCount,$exonStarts,$exonEnds,$strand) = @_;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
678 my (@left,@right);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
679 @left = (split ",", $exonStarts);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
680 @right = (split (",", $exonEnds));
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
681
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
682 for (my $i = 0; $i<$exonCount;$i++) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
683 #print "$left[$i] <= $pos ? $pos <= $right[$i]\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
684 if (($left[$i]+$jonctionSize < $pos) && ($pos < $right[$i]-$jonctionSize)) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
685 #print "URA!\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
686 return "exon";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
687 } elsif (($i+1<$exonCount)&&($right[$i]+$jonctionSize < $pos) && ($pos < $left[$i+1]-$jonctionSize)) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
688 return "intron";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
689 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
690 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
691 return "jonction";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
692 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
693
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
694
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
695 sub getTypeIntra {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
696
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
697 my ($geneEntry, $pos) = @_;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
698 my $type;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
699
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
700 if (($pos >= $geneEntry->{'firstIntronStart'})&&($pos <=$geneEntry->{'firstIntronEnd'})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
701 return "f_intron";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
702 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
703 if (($pos >= $geneEntry->{'firstExonStart'})&&($pos <=$geneEntry->{'firstExonEnd'})) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
704 return "f_exon";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
705 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
706 $type = getIntronExon ($pos, $geneEntry->{'exonCount'},$geneEntry->{'exonStarts'},$geneEntry->{'exonEnds'},$geneEntry->{'strand'});
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
707 return $type;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
708 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
709
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
710 sub getRNAlength {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
711 my ($exonStarts,$exonEnds) = @_;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
712 my (@left,@right);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
713 @left = (split ",", $exonStarts);
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
714 @right = (split (",", $exonEnds));
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
715 my $length = 0;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
716 for (my $i = 0; $i<scalar(@right);$i++) {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
717 $length+=$right[$i]-$left[$i];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
718 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
719 #print STDERR "length = $length\n";
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
720 return $length;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
721 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
722
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
723 sub min {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
724 my @a = sort {$a <=> $b} @_;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
725 $a[0];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
726 }
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
727
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
728 sub max {
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
729 my @a = sort {$b <=> $a} @_;
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
730 $a[0];
355bc8ae2487 Uploaded
jbrayet
parents:
diff changeset
731 }