1
|
1 #!/usr/bin/perl
|
|
2
|
|
3 #filter out dulpicates from SAMPLE (optional) and create a control dataset w/o duplicates with the same number of reads as in the SAMPLE
|
|
4
|
|
5 use strict;
|
|
6 use warnings;
|
|
7 use diagnostics;
|
|
8
|
|
9 my $usage = qq{
|
|
10 $0
|
|
11
|
|
12 -----------------------------
|
|
13 mandatory parameters:
|
|
14
|
|
15 -f peaks
|
|
16 -t min peak height
|
|
17 -o output file
|
|
18
|
|
19 -----------------------------
|
|
20 optional parameters:
|
|
21 -n name
|
|
22 none
|
|
23 };
|
|
24
|
|
25 if(scalar(@ARGV) == 0){
|
|
26 print $usage;
|
|
27 exit(0);
|
|
28 }
|
|
29
|
|
30 ## mandatory arguments
|
|
31
|
|
32 my $filename = "";
|
|
33 my $output_fname = "";
|
|
34
|
|
35 my $minPeakH = 0;
|
|
36
|
|
37 my $chromLengthsFile="";
|
|
38 my $expName = "User Track";
|
|
39
|
|
40 ## optional arguments
|
|
41
|
|
42 ## parse command line arguments
|
|
43
|
|
44 while(scalar(@ARGV) > 0){
|
|
45 my $this_arg = shift @ARGV;
|
|
46 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
|
|
47
|
|
48 elsif ( $this_arg eq '-f') {$filename = shift @ARGV;}
|
|
49 elsif ( $this_arg eq '-t') {$minPeakH = shift @ARGV;}
|
|
50 elsif ( $this_arg eq '-o') {$output_fname = shift @ARGV;}
|
|
51 elsif ( $this_arg eq '-g') {$chromLengthsFile = shift @ARGV;}
|
|
52 elsif ( $this_arg eq '-n') {$expName = shift @ARGV;}
|
|
53 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
|
|
54 }
|
|
55
|
|
56 if ( $filename eq ""){
|
|
57 die "you should specify chip file\n";
|
|
58 }
|
|
59 if( $output_fname eq ""){
|
|
60 die "you should specify output filename\n";
|
|
61 }
|
|
62
|
|
63 $minPeakH-=0.5 unless ($minPeakH=~m/0\.5/);
|
|
64
|
|
65 #read chromosome lengths if provided:
|
|
66 my %max;
|
|
67 if ($chromLengthsFile ne "") {
|
|
68 open FILE, "< $chromLengthsFile " || die "$chromLengthsFile : $!\n";
|
|
69 while(<FILE>){
|
|
70 chomp;
|
|
71 if (/(chr\S+)\s(\d+)/) {
|
|
72 $max{$1}=$2;
|
|
73 }
|
|
74 }
|
|
75 close FILE;
|
|
76 }
|
|
77
|
|
78 ######
|
|
79
|
|
80 print "\n-----------------\n\n";
|
|
81
|
|
82 my %hash;
|
|
83 my $chipCount = 0;
|
|
84 my @header;
|
|
85
|
|
86
|
|
87 open FILE, "< $filename " || die "$filename : $!\n";
|
|
88 open OUT, "> $output_fname" || die "$output_fname: $!\n";
|
|
89 print OUT "track name=\'$expName\' description=\'$expName\'\n";
|
|
90 my $count = 0;
|
|
91 my $scount = 0;
|
|
92
|
|
93 while(<FILE>){
|
|
94 chomp;
|
|
95 next if (/max/);
|
|
96 next if (/track/);
|
|
97 next if (/^\#/);
|
|
98 my @fields = split(/\t/,$_);
|
|
99 my $entry = $fields[0]."\t".$fields[2]."\t".$fields[3];
|
|
100 $count++;
|
|
101 if ($fields[4]>=$minPeakH) {
|
|
102 $scount ++;
|
|
103 $fields[0]= "chr".$fields[0] unless ($fields[0]=~m/chr/);
|
|
104
|
|
105 if ($chromLengthsFile ne "") {
|
|
106 my $maxV = $max{$fields[0]};
|
|
107 $fields[2]= min($fields[2],$maxV);
|
|
108 $fields[3]=min($fields[3],$maxV);
|
|
109 $fields[1]=min($fields[1],$maxV);
|
|
110 }
|
|
111 print OUT join("\t",$fields[0],$fields[2],$fields[3],$fields[1],$fields[4],"+",$fields[2],$fields[3],"255,120,11","1",$fields[3]-$fields[2],0,"\n");
|
|
112 }
|
|
113 }
|
|
114
|
|
115 close FILE;
|
|
116 close OUT;
|
|
117 print "read: $count peaks; selected: $scount\n";
|
|
118
|
|
119 sub min {
|
|
120 my ($a,$b) = @_;
|
|
121 if($a<$b) {
|
|
122 return $a;
|
|
123 }
|
|
124 $b;
|
|
125 }
|
|
126
|