annotate extractCentralRegions.pl @ 2:421bebfbd807 draft default tip

Uploaded
author jbrayet
date Tue, 05 Jan 2016 09:14:05 -0500
parents eaf2e2b19e41
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
1 #!/usr/bin/perl -w
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
2 use strict;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
3
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
4 #creats a BED file with central area of peaks
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
5
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
6
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
7 my $usage = qq{
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
8 $0
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
9
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
10 -----------------------------
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
11 mandatory parameters:
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
12
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
13 -f filename file with sites in BED format
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
14 -l value length of the cental regions
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
15
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
16 -----------------------------
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
17 optional parameters:
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
18 -v for verbose
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
19 -head 1/0 if there is a header
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
20 -o filename output file
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
21 };
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
22
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
23 if(scalar(@ARGV) <2){
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
24 print $usage;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
25 exit(0);
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
26 }
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
27
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
28 my $flank = 0;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
29 my $ResFilename = "";
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
30 my $file = "";
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
31 my $header = 0;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
32 my $verbose = 0;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
33
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
34 while(scalar(@ARGV) > 0){
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
35 my $this_arg = shift @ARGV;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
36 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
37 elsif ( $this_arg eq '-f') {$file = shift @ARGV;}
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
38 elsif ( $this_arg eq '-v') {$verbose = 1;}
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
39 elsif ( $this_arg eq '-head') {$header = shift @ARGV;}
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
40 elsif ( $this_arg eq '-o') {$ResFilename = shift @ARGV;}
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
41 elsif ( $this_arg eq '-l') {$flank = shift @ARGV;$flank /=2;}
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
42 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
43 }
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
44
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
45
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
46
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
47
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
48 my $count = 0;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
49 my %hash;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
50
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
51 open (FILE, "<$file") or die "Cannot open file $file!!!!: $!";
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
52 open (OUT, ">$ResFilename") or die "Cannot open file $ResFilename!!!!: $!";
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
53
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
54 if ($header) {
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
55 <FILE>;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
56 }
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
57
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
58 while (<FILE>) {
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
59 s/\R//g;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
60 next if (/^#/);
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
61 next if (/track/);
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
62 next if (/summit/);
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
63 my @a = split /\s/;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
64 my $chr = $a[0];
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
65 my $maxPos = $a[3];
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
66 my $score = $a[4];
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
67 if ($maxPos=~/\D/) {
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
68 $maxPos = int(($a[1]+$a[2])/2);
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
69 } elsif ($maxPos < $a[1]){ #MACS intervals
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
70 $maxPos = $a[1]+$a[4];
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
71 $score = $a[5];
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
72 }
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
73 my $firstPos = $maxPos-$flank;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
74 my $lastPos = $maxPos+$flank;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
75
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
76 my $ID=$chr.":".$firstPos."_".$lastPos."_".$score ;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
77 unless (exists($hash{$ID})) {
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
78 $hash{$ID}=1;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
79 $count++;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
80 print OUT "$chr\t$firstPos\t$lastPos\t$score\n";
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
81 }
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
82 }
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
83
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
84 #print "$file\t$count\n";
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
85 close FILE;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
86 close OUT;
eaf2e2b19e41 Uploaded
jbrayet
parents:
diff changeset
87