changeset 4:82b10f23f1fe draft

Uploaded
author jbrayet
date Thu, 13 Aug 2015 11:02:47 -0400
parents db5076a0494f
children 1ec8c1d9c3c0
files annotatePeaks.pl
diffstat 1 files changed, 926 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/annotatePeaks.pl	Thu Aug 13 11:02:47 2015 -0400
@@ -0,0 +1,926 @@
+#:t:::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#:t::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#:::::::::::::z;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#::::::::::::i@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@$@@@@
+#:::::::::::3@@@@@@@@@@@@@@@@@@@@@@@@@B@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#::::::::::3@@@@@@@@@@@@@@@@@@@@@BEEESSE5EEEEBBM@@@@@@@@@@@@@@@@@@@@@@@@@@
+#::::::::::3@@@@@@@@@@@@@@@@@@@@BEEEEEE35EE55E2355E5SBMB@@@@@@@@@@@@@@@@@$
+#::::::::::@@@@@@@@@@@@@@@@@@@EEEE55533t3tttt::::::!!!!7755E755SBBMMM@@@MM
+#::::::::::3@@@@@@@@@@@@@@@@@@EEEE2t3ttttt:::::::::::::::::::::::!7?5225EE
+#::::::::::3@@@@@@@@@@@@@@@@@@EEEEE31t::::::::::::::::::::::::::::::::3E5@
+#::::::::::3@@@@@@@@@@@@@@@@@@EEEEEEtt:::::::::::::::::::::::::::::::::353
+#::::::::::3@@@@@@@@@@@@@@@@@@EEEEEE1ttz::::::::::::::::::::::::::::::::35
+#:::::::::::@@@@@@@@@@@@@@@@@@EEEEEEEtz1::::::::::::::::::::::::::::::::t:
+#:::::::::!3@@@@@@@@@@@@@@@@@@@EEEEEttt::::::::::::::::::::::::::::::::;zz
+#::::::::::@@@@@@@@@@@@@@@@@@@@EEEEEttt:::::z;z:::::::::::::::::::::::::13
+#::::::::::3B@@@@@@@@@@@@@@@@@@EEEEEEE3tt:czzztti;:::::::::::::::::::::::3
+#::::ttt::::3@@@@@@@@@@@@@@@@EEEEE5EE25Ezt1EEEz5Etzzz;;;;:::::::::::::::::
+#:::::::::::I9@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEE@@@@@@@@@@@@@@Ez;:::::::::::
+#:::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@Ez::::::
+#::::::::::::::E@@@@@@@@@@@@@@@@@@@@@@@@@@@@@BE5EBB@@@@@@@@@@@@@@@EEE:::::
+#:::::::::::::::@@@@@@@@@@@@@@@@@@@@@@@@@@@@E1::35@@@@@@@@@@ME3MMME2::::::
+#:::::::::::::::?@@@@@@@@@@@@@@@@@@M@@@@@@@EE:::::3SB@@BBESEEt::::::::::::
+#::::::::::::::::J$@@@@@@@B@@@@@@@@@@@@@@@@EE:::::::!35E33t:::::::::::::::
+#:::::::::::::::::3@E@@@EE5EESE5EESE@@@@@@@Et::::::::::::tz:::::::::::::::
+#:::::::::::::::::J@E$@EEE5133555SE@@@@@@@@Et:::::::::::::::::::::::::::::
+#::::::::::::::::::E@E@EEEEtt3523EEE@@@@@@@E::::::::::::::::::::::::::::::
+#:t::::::::::::::::JEE3@@@EEEEEEEEEE@@@@@@@E:::::::::t;:::::::::::::::::::
+#:t:::::::::::::::::!5ES@EEEEEEEEES@@@@@@@@@E;:::;;;:3Ez::::::::::::::::::
+#:t::::::::::::::::::::JE@@EEEEEEE@@@@@@@@@@@@@@@@ME!:::;:::::::::::::::::
+#:tz::::::::::::::::::::JE@@@EEEE@@@@@@@@@@@@@@EE!:::::::t::::::::::::::::
+#:t::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@ESBE::::::::::::::::::::::::::
+#:::::::::::::::::::::::::Q@@@@@@@@@@@@@@@@EE3EE;:::::zzzz::::::::::::::::
+#:::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@@@@NN@@@@@@Ez:::::::::::::::
+#:zt:::::::::::::::::::::::3@@@@EE@@@@@@@@@@EEEEt::;z113E5t:::::::::::::::
+#::tt:::::::::::::::::::::::3@@@E@@@@@@@@@@@@@@@@BEt::::::::::::::::t:::::
+#:tt:t:::::::::::::::::::::::?S@@@@@@@@@@@BBEEE51!::::::::::::::zzzEt:::::
+#::::::::::::::::::::::::::::::3Q@@@@@@@BEEEEEt:::::::::::::;zz@@@EE::::::
+#::::::::::::::::::::::::::::::::75B@@@@@EEEtt;:::::::::;zz@@@@BEEEtz:::::
+#::::::::::::::::::::::::::::::::::::?9@@@@@@@@@@@E2Ezg@@@@@B@@@EEEE1t::::
+#:::::::::::::::::::::::::::::::::::::::3@@@@@@@@@@@@@@@@@@@E@EEEEEEEzzz::
+#::::::::::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@EEEEEEE5ttttt
+#:::::::::::::::::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@EEEEEEEEEEEtzt
+#::::::::::::::::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@E@@EEEEEEEEEEEE@@@
+#::::::::::::::::::::::::::g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE3EEEE@@@@@@@
+#:::::::::::::::::::::;;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEt33@@@@@@@@@@
+#:::::::::::::::::;g@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@E@@@@@@EEEtg@@@@@@@@@@@@
+#::::::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@EEEE@@@@@@@@@@@@@@@@@@@@@@@@
+#:::::::::::::@@@@@@@@@@@@@@@@@$@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#::::::::::;@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+#
+# Copyleft ↄ⃝ 2012 Institut Curie
+# Author(s): Valentina Boeva, Alban Lermine (Institut Curie) 2012
+# Contact: valentina.boeva@curie.fr, alban.lermine@curie.fr
+# This software is distributed under the terms of the GNU General
+# Public License, either Version 2, June 1991 or Version 3, June 2007.
+
+#!/usr/bin/perl -w
+
+#for a complete list of genes and a list of sites, sorts genes close to sites
+#printDistance
+
+#read only gene files with refSeqGenes
+#counts only once overlapping transcripts or genes
+
+#calculates some stats about locations of peaks
+
+#uses hierarchie : promoter, imUpstream, intragenic,enh, 5kbdownstream, intergenic + for genes: fExon,exon,fIntron,intron,junction+-50kb ONLY FOR "findClosestGene"
+#outputs fold change of expression is available
+
+#outputname - corrected
+#all isoforms are considered, even those that start and end at the same coordinates
+#uses fluorescence values
+
+#only one location per gene in mode "all" (but promoter+intragenic if both)
+
+#read directly Noresp/down/up genes
+
+#prints distance to TE
+
+use strict;
+use POSIX;
+
+my $SitesFilename ;
+my $GenesFilename ;
+my $SelectedGenesFilename = "";
+my $MirFilename = "";
+my $minScore = 0;
+my $BIGNUMBER = 10000000000;	
+my $enhLeft = -30000;
+my $enhRight = -1500;
+my $promLeft = $enhRight ;
+my $immediateDownstream = 2000;
+my $maxScore = $BIGNUMBER ;
+my $verbose = 0;
+my $maxDistance = $BIGNUMBER ;
+my $downStreamDist = 5000;
+my $jonctionSize = 50;
+my $all = 0;
+my $fluoFile = "";
+my $regType = "";
+my $GCislands = "";
+my $ResFilename = "";
+
+my $usage = qq{
+    $0
+
+    -----------------------------
+    mandatory parameters:
+    
+    -g   filename 	file with all genes   
+    -tf filename 	file with sites of TF 
+    
+    -----------------------------
+    optional parameters:
+    -v				for verbose
+    -mir 	filename	file with positions of miRNA
+    -maxDist	value		maximal Distance to genes (def. Inf)
+    -minScore	value		minimal Score (def. 0)
+    -maxScore	value		maximal Score (def. Inf)  
+    -selG	filename	file with selected genes and their expression levels
+    -all 			to output all genes intersecting with the peak
+    -fluo	filename	file with fluorescence
+    -regType	valeu		down-regulated, up-regulated, no-response
+    -gc         filename        file with gc-islands
+
+    -lp         valeu        	upstream of TSS region to define promoter
+    -rp         valeu        	downstream of TSS region to define immediate downstream
+    -enh         valeu        	upstream of TSS region to define enhancer
+    -dg         valeu        	downstream of transcription end region to define gene downstream
+
+    -o		filename	output filename
+
+};
+
+if(scalar(@ARGV) == 0){
+    print $usage;
+    exit(0);
+}
+
+
+while(scalar(@ARGV) > 0){
+    my $this_arg = shift @ARGV;
+    if ( $this_arg eq '-h') {print "$usage\n"; exit; }
+
+    elsif ( $this_arg eq '-g') {$GenesFilename = shift @ARGV;}
+    elsif ( $this_arg eq '-tf') {$SitesFilename = shift @ARGV;}
+    elsif ( $this_arg eq '-mir') {$MirFilename = shift @ARGV;}   
+    elsif ( $this_arg eq '-selG') {$SelectedGenesFilename = shift @ARGV;}  
+    elsif ( $this_arg eq '-maxDist') {$maxDistance = 1;}
+    elsif ( $this_arg eq '-maxScore') {$maxScore = shift @ARGV;}   
+    elsif ( $this_arg eq '-minScore') {$minScore = shift @ARGV;}  
+    elsif ( $this_arg eq '-v') {$verbose = 1;}  
+    elsif ( $this_arg eq '-all') {$all = 1;}
+    elsif ( $this_arg eq '-regType') {$regType = shift @ARGV;}
+
+    elsif ( $this_arg eq '-fluo') {$fluoFile = shift @ARGV;}
+    elsif ( $this_arg eq '-gc') {$GCislands = shift @ARGV;}  
+    elsif ( $this_arg eq '-o') {$ResFilename = shift @ARGV;}     
+
+    elsif ( $this_arg eq '-lp') {$enhRight = shift @ARGV;$promLeft = $enhRight;}
+    elsif ( $this_arg eq '-rp') {$immediateDownstream = shift @ARGV;}  
+    elsif ( $this_arg eq '-enh') {$enhLeft = shift @ARGV;}    
+    elsif ( $this_arg eq '-dg') {$downStreamDist = shift @ARGV;}      
+
+    elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
+}
+
+if ($maxDistance<0) {
+	die "\tThe maximal distance must be positive!\n";
+}
+my $tmpn = $SitesFilename;
+$tmpn =~ s/.*[\/\\]//;
+if ($ResFilename eq "") {
+	$ResFilename = $SelectedGenesFilename.$tmpn."_".$minScore."_dists.txt";
+
+	if ($maxScore != $BIGNUMBER) {	
+		$ResFilename = $SelectedGenesFilename.$SitesFilename."_".$minScore."_$maxScore"."_dists.txt";
+	}
+
+	if ($all == 1) {
+	    $ResFilename =~ s/_dists/_all_dists/;
+	}
+}
+
+
+#-----------read selected genes----------------
+my %selectedGenes;
+my %selectedGenesFoldChange;
+if ( $SelectedGenesFilename ne "") {
+    open (FILE, "<$SelectedGenesFilename ") or die "Cannot open file $SelectedGenesFilename !!!!: $!";
+    while (<FILE>) {
+        chomp;
+        my @a = split/\t/;
+        $selectedGenes{$a[1]} = $a[3];
+	$selectedGenesFoldChange{$a[1]} = $a[2];
+	#print "gene:$a[1],reg:$selectedGenes{$a[1]},FC:$selectedGenesFoldChange{$a[1]}\n";
+    }
+    
+    close FILE;
+    print "\t\t$SelectedGenesFilename is read!\n" if ($verbose);
+}
+
+#-----------read genes with fluorescence---------
+my %fluoGenes;
+if ( $fluoFile ne "") {
+    open (FILE, "<$fluoFile ") or die "Cannot open file $fluoFile !!!!: $!";
+    my $gene = "";
+    my $med = 0;
+    my %h;
+    while (<FILE>) {
+        chomp;
+        my @a = split/\t/;     
+
+        next if (scalar(@a)<5);
+        next if ($a[0] eq "probesets");
+        next unless ($a[0] =~m/\S/);
+        next unless ($a[4] =~m/\S/);
+        if ($gene ne "" && $gene ne $a[2]) {
+        	#calcMed        	
+        	$med = med(keys %h);        	        	
+        	$fluoGenes{$gene} = $med;
+        	$med=0;
+        	delete @h{keys %h};
+        } else {
+        	#$h{$a[4]} = 1;
+        }
+        $gene = $a[2];
+        $h{$a[4]} = 1;        
+       	#print "keys ", scalar(keys %h),"\t",keys %h,"\n";
+    }
+    if ($gene ne "") {
+        $med = med(keys %h);
+        $fluoGenes{$gene} = $med;
+    }
+    close FILE;
+    print "\t\t$fluoFile is read!\n" if ($verbose);
+}
+#-----------read GC-islands----------------
+my %GCislands;
+if ($GCislands ne "") {
+    open (FILE, "<$GCislands ") or die "Cannot open file $GCislands !!!!: $!";
+    
+     while (<FILE>) {
+        chomp;
+        my @a = split/\t/;
+        #bin	chrom	chromStart	chromEnd	name	length	cpgNum	gcNum	perCpg	perGc	obsExp
+        #107	chr1	36568608	36569851	CpG: 128	1243	128	766	20.6	61.6	1.09
+        my $chr = $a[1];
+        my $start = $a[2];
+        my $end = $a[3];
+        $GCislands{$chr}->{$start}=$end;
+     }
+    close FILE;
+} elsif ($verbose) {
+    print "you did not specify a file with GC-islands\n";    
+}
+
+#-----------read genes----------------
+
+my %genes;
+
+my $count = 0;
+
+open (GENES, "<$GenesFilename") or die "Cannot open file $GenesFilename!!!!: $!";
+<GENES>;
+while (<GENES>) {
+	chomp;	
+	if (/(chr.*)\s([+-])\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\d+)\s(\S+)\s(\S+)\s\S+\s(\S+)/){
+		my $name = $10;
+		my $chr = $1;
+		my $strand = $2;
+		if ($strand eq '+') {
+			$strand = 1;
+		}
+		else {
+			$strand = -1;	
+		}
+		my $leftPos = $3;
+		my $rightPos = $4;
+		my $cdsStart= $5;
+		my $cdsEnd= $6;
+		my $exonCount= $7;
+		my $exonStarts= $8;
+		my $exonEnds= $9;
+		my $ID = "$name\t$chr:$leftPos-$rightPos;$exonStarts-$exonEnds";
+		my $foldChange = 1;
+		my $reg = "NA";
+		my $fluo = "NA";
+		if ( $SelectedGenesFilename ne "") {
+		        #print "$name\t";
+		        if (exists($selectedGenes{$name})) {
+		                $reg = $selectedGenes{$name};                        
+		                $foldChange = $selectedGenesFoldChange{$name};
+		        }
+		}
+		unless (exists($genes{$chr})) {
+			my %h;
+			$genes{$chr} = \%h;
+		}
+		unless (exists($genes{$chr}->{$ID})) {
+			my %h1;
+			$genes{$chr}->{$ID} = \%h1;	
+			$count++;
+		}
+		if ( $fluoFile ne "") {                
+		    if (exists($fluoGenes{$name})) {
+			$fluo = $fluoGenes{$name};
+		    }
+		}  
+		$genes{$chr}->{$ID}{'name'} = $name ;
+		$genes{$chr}->{$ID}{'left'} = $leftPos ;
+		$genes{$chr}->{$ID}{'right'} = $rightPos ;
+		$genes{$chr}->{$ID}{'cdsStart'} = $cdsStart;
+		$genes{$chr}->{$ID}{'cdsEnd'} = $cdsEnd;			
+		$genes{$chr}->{$ID}{'strand'} = $strand; 
+		$genes{$chr}->{$ID}{'exonCount'} = $exonCount;			
+		$genes{$chr}->{$ID}{'exonStarts'} = $exonStarts; 
+		$genes{$chr}->{$ID}{'exonEnds'} = $exonEnds; 			
+		$genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
+		$genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;		
+		$genes{$chr}->{$ID}{'reg'} = $reg;
+		$genes{$chr}->{$ID}{'foldChange'} = $foldChange;
+		$genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos); 
+		($genes{$chr}->{$ID}{'firstIntronStart'},$genes{$chr}->{$ID}{'firstIntronEnd'}) = getFirstIntron ($exonCount,$exonStarts,$exonEnds,$strand);
+		($genes{$chr}->{$ID}{'firstExonStart'},$genes{$chr}->{$ID}{'firstExonEnd'}) = getFirstExon ($exonCount,$exonStarts,$exonEnds,$strand);
+	
+		$genes{$chr}->{$ID}{'fluo'} = $fluo;
+		
+		$genes{$chr}->{$ID}{'GCisland'} = 0;
+		if ($GCislands ne "") {
+		    $genes{$chr}->{$ID}{'GCisland'} = checkIfGC ($genes{$chr}->{$ID}{'TSS'},$strand,2000,$GCislands{$chr});
+		}
+			
+	}
+}
+print "Total genes : $count\n";
+close GENES;
+print "\t\t$GenesFilename is read!\n" if ($verbose);
+#for my $gName (sort keys %{$genes{'chr18'}}) {
+
+ #    print "$gName\t$genes{'chr18'}->{$gName}{'TSS'}\n";
+#}
+
+#-----------read file with sites miRNA, store as genes-----
+
+if ( $MirFilename eq ""){
+    print "you did not specify file with miRNA\n" if ($verbose);;
+}
+else {
+
+
+	open (MIR, "<$MirFilename ") or die "Cannot open file $MirFilename !!!!: $!";
+	#chr1	20669090	20669163	mmu-mir-206	960	+
+	while (<MIR>) {
+		chomp;	
+		my ($name, $chr, $leftPos, $rightPos, $strand );
+		my $fluo = "NA";
+
+#1	.	miRNA	20669091	20669163	.	+	.	ACC="MI0000249"; ID="mmu-mir-206";
+		if (/([0-9XYM]+)\s.\smiRNA\s(\d+)\s(\d+)\s.\s([+-])\s.\sACC=.*ID=\"(.*)\"/) {
+			$name = $5;
+			$chr = $1;
+			$leftPos = $2;
+			$rightPos = $3;
+			$strand = $4;
+		}
+		elsif (/(.*)\s(\d+)\s(\d+)\s(.*)\s(.*)\s(.*)/){
+			$name = $4;
+			$chr = $1;
+			$leftPos = $2;
+			$rightPos = $3;
+			$strand = $6;
+		} else {
+			next;
+		}
+
+		unless ($chr =~ m/chr/) {
+			$chr = "chr".$chr;
+		}
+		my $ID = "$name\t$chr:$leftPos-$rightPos";
+
+		if ($strand eq '+') {
+			$strand = 1;
+		}
+		else {
+			$strand = -1;	
+		}
+
+		unless (exists($genes{$chr})) {
+			my %h;
+			$genes{$chr} = \%h;
+		}
+		unless (exists($genes{$chr}->{$ID})) {
+			my %h1;
+			$genes{$chr}->{$ID} = \%h1;	
+			$count++;
+		}
+		$genes{$chr}->{$ID}{'name'} = $name ;
+		$genes{$chr}->{$ID}{'left'} = $leftPos ;
+		$genes{$chr}->{$ID}{'right'} = $rightPos ;
+		$genes{$chr}->{$ID}{'cdsStart'} = $leftPos ;
+		$genes{$chr}->{$ID}{'cdsEnd'} = $rightPos ;			
+		$genes{$chr}->{$ID}{'strand'} = $strand;
+		$genes{$chr}->{$ID}{'length'} = abs ($leftPos-$rightPos); 
+		$genes{$chr}->{$ID}{'exonCount'} = 1;			
+		$genes{$chr}->{$ID}{'exonStarts'} = $leftPos ; 
+		$genes{$chr}->{$ID}{'exonEnds'} = $rightPos ; 			
+		$genes{$chr}->{$ID}{'TSS'} = ($strand == 1) ? $leftPos :$rightPos ;
+		$genes{$chr}->{$ID}{'TE'} = ($strand == -1) ? $leftPos :$rightPos ;		
+		$genes{$chr}->{$ID}{'reg'} = "miRNA";
+		$genes{$chr}->{$ID}{'foldChange'} = 1;	
+		($genes{$chr}->{$ID}{'firstIntronStart'},$genes{$chr}->{$ID}{'firstIntronEnd'}) = (0,0);
+		($genes{$chr}->{$ID}{'firstExonStart'},$genes{$chr}->{$ID}{'firstExonEnd'}) = (0,0);
+
+		$genes{$chr}->{$ID}{'fluo'} = $fluo;
+		$genes{$chr}->{$ID}{'GCisland'} = 0;
+
+
+		
+	}
+
+
+	close MIR;
+	print "\t\t$MirFilename is read!\n" if ($verbose) ;
+}
+#-----------read file with sites and find N closest genes-----
+my $numberOfAllSites = 0;
+
+open (FILE, "<$SitesFilename") or die "Cannot open file $SitesFilename!!!!: $!";
+open (OUT , ">$ResFilename") or die "Cannot open file $ResFilename!!!!: $!";
+print OUT "Chromosome\tStart\tEnd\tMax\tScore\tDistTSS\tType\tTypeIntra\tReg\tFoldChange\tDistTE\tGeneName\tGeneCoordinates\n" ;
+my $header = <FILE>; #read header
+my @a = split (/\t/,$header );
+my $correction = 0;
+if ($a[1] =~m/chr/) {
+	$correction=1;
+}
+
+unless ($header=~m/Chromosome/ || $header=~m/track/|| $header=~m/Start/i) {
+	close FILE;
+	open (FILE, "<$SitesFilename") or die "Cannot open file $SitesFilename!!!!: $!";
+}
+
+$count = 0;
+
+my %typehash;
+my %typehashIntra;
+
+$typehashIntra{"f_exon"} = 0;
+$typehashIntra{"f_intron"} = 0;
+$typehashIntra{"jonction"} = 0;
+$typehashIntra{"exon"} = 0;
+$typehashIntra{"intron"} = 0;
+
+
+while (<FILE>) {
+	chomp;		
+	$numberOfAllSites++;	
+	my @a = split /\t/;	
+	my $chro = $a[0+$correction]; 
+	my $fpos = $a[1+$correction];
+	my $lpos = $a[2+$correction];
+	my $maxpos = $a[3+$correction];
+	if ($maxpos =~ m/\D/) {
+		$maxpos = int(($lpos+$fpos)/2);
+	}
+	my $score = $a[4+$correction];
+	next if ($score < $minScore);
+	next if ($score >= $maxScore);
+	$score = $score +1 -1;
+    	#print  "$score" ;	
+
+	#my $site = "$chro:$fpos-$lpos\t$maxpos\t$score";
+	#print $site ;
+	#my $motifNumber = $a[3];
+	
+	#my $geneSet = &search_genes($N,$chro,($fpos+$lpos)/2,\%genes);
+	my @b;
+	unless ($all) {
+		@b = &findClosestGene($fpos,$lpos,$score,$chro,$maxpos,\%genes); #findAllClosestGene for all genes overlaping a peak
+	} else {
+		@b = &findAllClosestGeneOneLocation ($fpos,$lpos,$score,$chro,$maxpos,\%genes); #findAllClosestGene
+	}
+		#print "$distTSS\t$distTE\n" unless ($distTSS == $BIGNUMBER);	
+	for my $type (@b) {
+		$typehash{$type} = 0 unless (exists($typehash{$type}));
+		$typehash{$type}++;
+	}
+	$count ++;	
+
+}
+close FILE;
+close OUT ;
+
+print "Total Sites: $count\n";
+my $nEntries = 0;
+for my $type (sort keys %typehash) {
+	$nEntries += $typehash{$type};
+}
+print "Type\tCount\tFrequency\n";
+for my $type (sort keys %typehash) {
+	print $type,"\t",$typehash{$type},"\t",$typehash{$type}/$nEntries,"\n";
+}
+for my $type (sort keys %typehashIntra) {
+	print $type,"\t",$typehashIntra{$type},"\t",$typehashIntra{$type}/$nEntries,"\n";
+}
+ 
+ 
+#my @arr = @{$genes{'chr'}};
+
+sub findClosestGene {	
+	my ($fpos,$lpos,$score,$chro,$pos,$genes) = @_;
+	my ($distTSS,$distTE,$type,$reg,$geneName,$foldChange) = ($BIGNUMBER,$BIGNUMBER,"intergenic",0,0,0);
+	my $locDistTSS = 0;
+	my $locDistTE  = 0;
+	my %hashForOverlapingGenes;
+	my @array2return;
+	my $typeIntra = "NA";
+	for my $gName (keys %{$genes->{$chro}}) {
+		my $TSS = $genes->{$chro}{$gName}{'TSS'};	
+		my $strand = $genes->{$chro}{$gName}{'strand'};
+	    	my $TE = $genes->{$chro}{$gName}{'TE'};	
+	    	
+	    	$locDistTE = ($pos-$TE)*$strand;
+		$locDistTSS = ($pos-$TSS)*$strand;
+		#print "$pos-$genes->{$chro}{$gName}{'TSS'})*$genes->{$chro}{$gName}{'strand'} = $locDistTSS\n";
+		#my $locDistTE = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
+		
+		#print "$gName $maxDistance $pos-$genes->{$chro}{$gName}{'TSS'} $genes->{$chro}{$gName}{'strand'}\n";
+		#print "$locDistTSS $enhLeft $enhRight\n";
+				
+		if (($locDistTSS>= $enhLeft)&&($locDistTSS<$enhRight)) {
+		    $type = "enhancer";			
+		} elsif (($locDistTSS>= $enhRight)&&($locDistTSS<=0)) {
+		    $type = "promoter";
+		} elsif (($locDistTSS> 0)&&($locDistTSS<=$immediateDownstream)&&($locDistTE<=0)) {
+		    $type = "immediateDownstream";			
+		} elsif (($locDistTSS >= $immediateDownstream)&&($locDistTE<=0)) {
+		    $type = "intragenic"; 
+		} elsif (($locDistTE >= 0)&&($locDistTE<=$downStreamDist)) {
+			$type = "5kbDownstream";		
+		} elsif (abs($locDistTSS)<abs($distTSS)) {
+			$distTSS = $locDistTSS;
+			$distTE = $locDistTE;
+			($reg,$geneName,$foldChange) = ($genes->{$chro}{$gName}{'reg'},$gName,$genes->{$chro}{$gName}{'foldChange'});
+		}
+		
+		if (($type ne "intergenic")&&($type ne "NA")) {
+			unless (exists ($hashForOverlapingGenes{$type})) {
+				my %h;
+				$hashForOverlapingGenes{$type} = \%h;
+			}
+			$hashForOverlapingGenes{$type}->{$locDistTSS}=$gName;
+
+			#print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$genes->{$chro}{$gName}{'reg'}\t$gName\n" ;
+			$type = "NA";
+			#unless ($gName =~ m/$TSS/) {
+			 #   print "$gName\t$TSS\n";
+			#}
+		}
+		
+	}
+	if ($type eq "intergenic") {
+		print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$typeIntra\t$reg\t$foldChange\t$distTE\t$geneName\n" ;
+		#print  "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$geneName\n" ;
+		push(@array2return,$type);
+
+	} else {
+		my $selectedType;
+		if (exists ($hashForOverlapingGenes{"promoter"})) {
+			$selectedType = "promoter";
+		} elsif (exists ($hashForOverlapingGenes{"immediateDownstream"})) {
+			$selectedType = "immediateDownstream";
+		}  elsif (exists ($hashForOverlapingGenes{"intragenic"})) {
+			$selectedType = "intragenic";	
+		}   elsif (exists ($hashForOverlapingGenes{"enhancer"})) {
+			$selectedType = "enhancer";			
+		} elsif (exists ($hashForOverlapingGenes{"5kbDownstream"})) {
+			$selectedType = "5kbDownstream";		
+		}
+		my $bestGene = printBest($hashForOverlapingGenes{$selectedType},$chro,$fpos,$lpos,$pos,$score,$genes,$selectedType); #print "$type\n";		
+		push(@array2return,$selectedType);		
+		my %otherGenes;
+		for my $type ("promoter","immediateDownstream","intragenic","enhancer","5kbDownstream") {  #"firstIntron","exon","intron",
+			for my $locDistTSS (sort {$a<=>$b} keys %{$hashForOverlapingGenes{$type}}) {
+				my $otherGene = $hashForOverlapingGenes{$type}->{$locDistTSS};
+				next if ($genes->{$chro}{$bestGene}{'name'} eq $genes->{$chro}{$otherGene}{'name'});
+				next if (exists ($otherGenes{$genes->{$chro}{$otherGene}{'name'}}));
+				if (overlapGenes($otherGene,$bestGene,$chro,$genes)) {
+				    
+					if (($type eq "intragenic")||($type eq "immediateDownstream")) {
+					    $typeIntra = &getTypeIntra($genes->{$chro}{$otherGene},$pos);
+					    $typehashIntra{$typeIntra}++;
+					}else {
+					    $typeIntra = "NA";					}				    
+					print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$otherGene}{'reg'}\t$genes->{$chro}{$otherGene}{'foldChange'}\t",($pos-$genes->{$chro}{$otherGene}{'TE'})*$genes->{$chro}{$otherGene}{'strand'},"\t$otherGene\n" ;
+					push(@array2return,$type);
+					
+					#print "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$genes->{$chro}{$otherGene}{'reg'}\t$otherGene\n" ;
+					#print $type,"\n";
+					$otherGenes{$genes->{$chro}{$otherGene}{'name'}} = 1;
+				}
+			}	
+		}
+		
+	}
+	return @array2return;
+}
+
+sub findAllClosestGeneOneLocation {	
+	my ($fpos,$lpos,$score,$chro,$pos,$genes) = @_;
+	my ($distTSS,$type,$reg,$geneName,$foldChange) = ($BIGNUMBER,"intergenic",0,0,0);
+	my $locDistTSS = 0;
+	my $locDistTE  = 0;
+	my %hashForOverlapingGenes;
+	my @array2return;
+	my $typeIntra = "NA";
+	for my $gName (keys %{$genes->{$chro}}) {
+		my $TSS = $genes->{$chro}{$gName}{'TSS'};	
+		my $strand = $genes->{$chro}{$gName}{'strand'};
+	    	my $TE = $genes->{$chro}{$gName}{'TE'};	
+	    	
+	    	$locDistTE = ($pos-$TE)*$strand;
+		$locDistTSS = ($pos-$TSS)*$strand;
+		#print "$pos-$genes->{$chro}{$gName}{'TSS'})*$genes->{$chro}{$gName}{'strand'} = $locDistTSS\n";
+		#my $locDistTE = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
+		
+		#print "$gName $maxDistance $pos-$genes->{$chro}{$gName}{'TSS'} $genes->{$chro}{$gName}{'strand'}\n";
+		#print "$locDistTSS $enhLeft $enhRight\n";
+				
+		if (($locDistTSS>= $enhLeft)&&($locDistTSS<$enhRight)) {
+		    $type = "enhancer";			
+		} elsif (($locDistTSS>= $enhRight)&&($locDistTSS<=0)) {
+		    $type = "promoter";
+		} elsif (($locDistTSS> 0)&&($locDistTSS<=$immediateDownstream)&&($locDistTE<=0)) {
+		    $type = "immediateDownstream";			
+		} elsif (($locDistTSS >= $immediateDownstream)&&($locDistTE<=0)) {
+		    $type = "intragenic"; 
+		} elsif (($locDistTE >= 0)&&($locDistTE<=$downStreamDist)) {
+			$type = "5kbDownstream";		
+		} elsif (abs($locDistTSS)<abs($distTSS)) {
+			$distTSS = $locDistTSS;
+			($reg,$geneName,$foldChange) = ($genes->{$chro}{$gName}{'reg'},$gName,$genes->{$chro}{$gName}{'foldChange'});
+		}
+		
+		if (($type ne "intergenic")&&($type ne "NA")) {
+			unless (exists ($hashForOverlapingGenes{$type})) {
+			    my %h;
+			    $hashForOverlapingGenes{$type} = \%h;			
+			    $hashForOverlapingGenes{$type}->{$gName}=$locDistTSS;
+			    
+			    if (($type eq "intragenic")||($type eq "immediateDownstream")) {
+				$typeIntra = &getTypeIntra($genes->{$chro}{$gName},$pos);
+				$typehashIntra{$typeIntra}++;
+			    }	
+			    print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$gName}{'reg'}\t$genes->{$chro}{$gName}{'fluo'}\t$genes->{$chro}{$gName}{'foldChange'}\t$genes{$chro}->{$gName}{'GCisland'}\t",($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'},"\t$gName\n" ; 
+			    push(@array2return,$type);	
+			}
+			$typeIntra = "NA";
+			$type = "NA";
+			#unless ($gName =~ m/$TSS/) {
+			 #   print "$gName\t$TSS\n";
+			#}
+		}
+		
+	}
+	if ($type eq "intergenic") {
+		print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$geneName}{'reg'}\t$genes->{$chro}{$geneName}{'fluo'}\t$genes->{$chro}{$geneName}{'foldChange'}\t$genes{$chro}->{$geneName}{'GCisland'}\t",($pos-$genes->{$chro}{$geneName}{'TE'})*$genes->{$chro}{$geneName}{'strand'},"\t$geneName\n" ; 
+
+		#print  "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$geneName\n" ;
+		push(@array2return,$type);
+
+	} 					
+	return @array2return;
+}
+
+sub findAllClosestGene {	
+	my ($fpos,$lpos,$score,$chro,$pos,$genes) = @_;
+	my ($distTSS,$type,$reg,$geneName,$foldChange) = ($BIGNUMBER,"intergenic",0,0,0);
+	my $locDistTSS = 0;
+	my $locDistTE  = 0;
+	my %hashForOverlapingGenes;
+	my @array2return;
+	my $typeIntra = "NA";
+	for my $gName (keys %{$genes->{$chro}}) {
+		my $TSS = $genes->{$chro}{$gName}{'TSS'};	
+		my $strand = $genes->{$chro}{$gName}{'strand'};
+	    	my $TE = $genes->{$chro}{$gName}{'TE'};	
+	    	
+	    	$locDistTE = ($pos-$TE)*$strand;
+		$locDistTSS = ($pos-$TSS)*$strand;
+		#print "$pos-$genes->{$chro}{$gName}{'TSS'})*$genes->{$chro}{$gName}{'strand'} = $locDistTSS\n";
+		#my $locDistTE = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
+		
+		#print "$gName $maxDistance $pos-$genes->{$chro}{$gName}{'TSS'} $genes->{$chro}{$gName}{'strand'}\n";
+		#print "$locDistTSS $enhLeft $enhRight\n";
+				
+		if (($locDistTSS>= $enhLeft)&&($locDistTSS<$enhRight)) {
+		    $type = "enhancer";			
+		} elsif (($locDistTSS>= $enhRight)&&($locDistTSS<=0)) {
+		    $type = "promoter";
+		} elsif (($locDistTSS> 0)&&($locDistTSS<=$immediateDownstream)&&($locDistTE<=0)) {
+		    $type = "immediateDownstream";			
+		} elsif (($locDistTSS >= $immediateDownstream)&&($locDistTE<=0)) {
+		    $type = "intragenic"; 
+		} elsif (($locDistTE >= 0)&&($locDistTE<=$downStreamDist)) {
+			$type = "5kbDownstream";		
+		} elsif (abs($locDistTSS)<abs($distTSS)) {
+			$distTSS = $locDistTSS;
+			($reg,$geneName,$foldChange) = ($genes->{$chro}{$gName}{'reg'},$gName,$genes->{$chro}{$gName}{'foldChange'});
+		}
+		
+		if (($type ne "intergenic")&&($type ne "NA")) {
+			unless (exists ($hashForOverlapingGenes{$type})) {
+				my %h;
+				$hashForOverlapingGenes{$type} = \%h;
+			}
+			#$hashForOverlapingGenes{$type}->{$gName}=$locDistTSS;
+			
+			$typeIntra = "NA";		
+			if (($type eq "intragenic")||($type eq "immediateDownstream")) {
+				$typeIntra = &getTypeIntra($genes->{$chro}{$gName},$pos);
+				$typehashIntra{$typeIntra}++;
+			}	
+			print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$gName}{'reg'}\t$genes->{$chro}{$gName}{'fluo'}\t$genes->{$chro}{$gName}{'foldChange'}\t$genes{$chro}->{$gName}{'GCisland'}\t",($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'},"\t$gName\n" ; 
+			push(@array2return,$type);
+			
+			$type = "NA";
+			#unless ($gName =~ m/$TSS/) {
+			 #   print "$gName\t$TSS\n";
+			#}
+		}
+		
+	}
+	if ($type eq "intergenic") {
+		print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$geneName}{'reg'}\t$genes->{$chro}{$geneName}{'fluo'}\t$genes->{$chro}{$geneName}{'foldChange'}\t$genes{$chro}->{$geneName}{'GCisland'}\t",($pos-$genes->{$chro}{$geneName}{'TE'})*$genes->{$chro}{$geneName}{'strand'},"\t$geneName\n" ; 
+
+		#print  "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$geneName\n" ;
+		push(@array2return,$type);
+
+	} 					
+	return @array2return;
+}
+
+sub getTypeIntra {
+    
+    my ($geneEntry, $pos) = @_;
+    my $type;
+    
+    if (($pos >= $geneEntry->{'firstIntronStart'})&&($pos <=$geneEntry->{'firstIntronEnd'})) {	   
+	    return "f_intron";
+    }
+    if (($pos >= $geneEntry->{'firstExonStart'})&&($pos <=$geneEntry->{'firstExonEnd'})) {	   
+	    return "f_exon";
+    }
+    $type = getIntronExon ($pos, $geneEntry->{'exonCount'},$geneEntry->{'exonStarts'},$geneEntry->{'exonEnds'},$geneEntry->{'strand'});
+    return $type;
+}
+
+sub overlapGenes {
+	my ($otherGene,$bestGene,$chro,$genes)= @_;
+	my $a1 = $genes->{$chro}{$otherGene}{'left'};
+	my $a2 = $genes->{$chro}{$bestGene}{'left'};
+	my $e1 = $genes->{$chro}{$otherGene}{'right'};
+	my $e2 = $genes->{$chro}{$bestGene}{'right'};
+	if (($a1 >= $a2)&&($a1 <= $e2)) {
+		return 1;
+	}
+	if (($a2 >= $a1)&&($a2 <= $e1)) {
+		return 1;
+	}
+	return 0;
+}
+
+sub printBest {
+	my ($hash,$chro,$fpos,$lpos,$pos,$score,$genes,$type) = @_;
+	for my $locDistTSS (sort {$a<=>$b} keys %{$hash}) {
+		my $gName = $hash->{$locDistTSS};
+		my $typeIntra = "NA";		
+		if (($type eq "intragenic")||($type eq "immediateDownstream")) {
+			$typeIntra = &getTypeIntra($genes->{$chro}{$gName},$pos);
+			$typehashIntra{$typeIntra}++;
+		}	
+		print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$typeIntra\t$genes->{$chro}{$gName}{'reg'}\t$genes->{$chro}{$gName}{'foldChange'}\t",($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'},"\t$gName\n" ;		
+		return $gName;
+	}
+}
+
+#sub findAllClosestGene {	
+#	my ($fpos,$lpos,$score,$chro,$pos,$genes) = @_;
+#	my ($distTSS,$type,$reg,$geneName,$foldChange) = ($BIGNUMBER,"intergenic",0,0,0);
+#	my $locDistTSS = 0;
+#	my $locDistTE = 0;
+#	my @array2return;
+#	for my $gName (keys %{$genes->{$chro}}) {
+#		my $TSS = $genes->{$chro}{$gName}{'TSS'};	
+#		my $strand = $genes->{$chro}{$gName}{'strand'};
+#	    	my $TE = $genes->{$chro}{$gName}{'TE'};	
+#		$locDistTSS = ($pos-$TSS)*$strand;
+#		$locDistTE = ($pos-$TE)*$strand;
+#		#print "$pos-$genes->{$chro}{$gName}{'TSS'})*$genes->{$chro}{$gName}{'strand'} = $locDistTSS\n";
+#		#my $locDistTE = ($pos-$genes->{$chro}{$gName}{'TE'})*$genes->{$chro}{$gName}{'strand'};
+#		
+#		#print "$gName $maxDistance $pos-$genes->{$chro}{$gName}{'TSS'} $genes->{$chro}{$gName}{'strand'}\n";
+#		#print "$locDistTSS $enhLeft $enhRight\n";
+#				
+#		if (($locDistTSS>= $enhLeft)&&($locDistTSS<$enhRight)) {
+#			$type = "enhancer";
+#		} elsif (($locDistTSS>= $enhRight)&&($locDistTSS<=0)) {
+#			$type = "promoter";
+#		} elsif (($locDistTSS> 0)&&($locDistTSS<=$immediateDownstream)) {
+#			$type = "immediateDownstream";			
+#		} elsif (($pos >= $genes{$chro}->{$gName}{'firstIntronStart'})&&($pos <= $genes{$chro}->{$gName}{'firstIntronEnd'})) {
+#			$type = "firstIntron";						
+#		} elsif (($locDistTSS> $immediateDownstream)&&($locDistTSS<=$genes{$chro}->{$gName}{'length'})) {
+#			#$type = "intragenic";
+#			$type = &getIntronExon ($pos, $genes{$chro}->{$gName}{'exonCount'},$genes{$chro}->{$gName}{'exonStarts'},$genes{$chro}->{$gName}{'exonEnds'},$genes{$chro}->{$gName}{'strand'});
+#		} elsif (($locDistTE >= 0)&&($locDistTE<=$downStreamDist)) {
+#			$type = "5kbDownstream";		
+#		} elsif (abs($locDistTSS)<abs($distTSS)) {
+#			$distTSS = $locDistTSS;
+#			($reg,$geneName,$foldChange) = ($genes->{$chro}{$gName}{'reg'},$gName,$genes->{$chro}{$gName}{'foldChange'});
+#		}
+#		if (($type ne "intergenic")&&($type ne "NA")) {
+#			print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$locDistTSS\t$type\t$genes->{$chro}{$gName}{'reg'}\t$genes->{$chro}{$gName}{'foldChange'}\t$gName\n" ;
+#			push(@array2return,$type);
+#			$type = "NA";
+#			#unless ($gName =~ m/$TSS/) {
+#			 #   print "$gName\t$TSS\n";
+#			#}
+#		}
+#		
+#	}
+#	if ($type eq "intergenic") {
+#		print OUT "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$foldChange\t$geneName\n" ;
+#		push(@array2return,$type);
+#		#print  "$chro\t$fpos\t$lpos\t$pos\t$score\t$distTSS\t$type\t$reg\t$geneName\n" ;
+#
+#	}
+#	return @array2return;
+#}
+
+sub getFirstIntron {
+	my ($exonCount,$exonStarts,$exonEnds,$strand) = @_;
+	my ($left,$right);
+	if ($exonCount == 1) {
+		return (0,0);
+	}
+	if ($strand == 1) {
+		$left = (split ",", $exonEnds)[0]+$jonctionSize;
+		$right = (split (",", $exonStarts))[1]-$jonctionSize;
+	} else {
+		$left = (split (",", $exonEnds))[$exonCount-2]+$jonctionSize;
+		$right = (split (",", $exonStarts))[$exonCount-1]-$jonctionSize;	
+	}
+	($left,$right);
+}
+
+sub getFirstExon {
+	my ($exonCount,$exonStarts,$exonEnds,$strand) = @_;
+	my ($left,$right);
+	if ($exonCount == 1) {
+		return (0,0);
+	}
+	if ($strand == 1) {
+		$left = (split ",", $exonStarts)[0];
+		$right = (split (",", $exonEnds))[0]-$jonctionSize;
+	} else {
+		$left = (split (",", $exonStarts))[$exonCount-1]+$jonctionSize;
+		$right = (split (",", $exonEnds))[$exonCount-1];	
+	}
+	($left,$right);
+}
+
+sub getIntronExon {
+	my ($pos,$exonCount,$exonStarts,$exonEnds,$strand) = @_;
+	my (@left,@right);
+	@left = (split ",", $exonStarts);
+	@right = (split (",", $exonEnds));
+
+	for (my $i = 0; $i<$exonCount;$i++) {
+		#print "$left[$i] <= $pos ? $pos <= $right[$i]\n";
+		if (($left[$i]+$jonctionSize < $pos) && ($pos < $right[$i]-$jonctionSize)) {
+			#print "URA!\n";
+			return "exon";
+		} elsif (($i+1<$exonCount)&&($right[$i]+$jonctionSize < $pos) && ($pos < $left[$i+1]-$jonctionSize)) {
+			return "intron";
+		}
+	}
+	return "jonction";	
+}
+
+sub min {
+	return $_[0] if ($_[0]<$_[1]);
+	$_[1];
+}
+sub med {
+	my @arr = @_;
+	my $med = 0;
+	@arr = sort {$a <=> $b} @arr; 
+	if ((scalar(@arr)/2) =~ m/[\.\,]5/) {
+		return $arr[floor(scalar(@arr)/2)];
+	} else {
+		return ($arr[scalar(@arr)/2]+$arr[scalar(@arr)/2-1])/2;
+	}
+	$med;	
+}
+
+sub checkIfGC {
+    my ($TSS,$strand,$dist,$GCislandsChr)=@_;
+    my $ifGC = 0;
+    my $leftProm=$TSS-$dist;
+    my $rightProm = $TSS;
+    if ($strand== -1) {
+	my $leftProm=$TSS;
+	my $rightProm = $TSS+$dist;	
+    }
+    for my $leftGC (keys %{$GCislandsChr}) {
+	my $rightGC = $GCislandsChr->{$leftGC};
+	if ($leftGC>=$leftProm&&$leftGC<=$rightProm || $rightGC>=$leftProm&&$rightGC<=$rightProm) {
+	    return "GC-island";
+	}
+    }       
+    return $ifGC ;
+}