annotate createControlPeakSubSet.pl @ 2:cfede5ba1e76 draft

Uploaded
author jbrayet
date Tue, 05 Jan 2016 08:06:05 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
1 #!/usr/bin/perl
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
2
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
3 #create a control dataset with the same number of reads as in the SAMPLE (highest peaks)
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
4
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
5 use strict;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
6 use warnings;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
7 use diagnostics;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
8
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
9 my $usage = qq{
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
10 $0
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
11
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
12 -----------------------------
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
13 mandatory parameters:
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
14
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
15 -f CHiP_file
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
16 -c control_file
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
17 -o output file
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
18 -----------------------------
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
19 optional parameters:
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
20
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
21 -n number of files to create
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
22
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
23 none
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
24 };
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
25
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
26 if(scalar(@ARGV) == 0){
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
27 print $usage;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
28 exit(0);
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
29 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
30
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
31 ## mandatory arguments
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
32
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
33 my $filename = "";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
34 my $output_fname = "";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
35
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
36 my $controlFilename = "";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
37
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
38 my $nBootstrap = 1;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
39
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
40
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
41 ## optional arguments
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
42
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
43 ## parse command line arguments
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
44
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
45 while(scalar(@ARGV) > 0){
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
46 my $this_arg = shift @ARGV;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
47 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
48
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
49 elsif ( $this_arg eq '-f') {$filename = shift @ARGV;}
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
50 elsif ( $this_arg eq '-c') {$controlFilename = shift @ARGV;}
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
51 elsif ( $this_arg eq '-o') {$output_fname = shift @ARGV;}
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
52 elsif ( $this_arg eq '-n') {$nBootstrap = shift @ARGV;}
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
53
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
54 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
55 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
56
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
57 if ( $filename eq ""){
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
58 die "you should specify chip file\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
59 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
60 if( $controlFilename eq ""){
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
61 die "you should specify control file\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
62 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
63
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
64 if( $output_fname eq ""){
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
65 die "you should specify output filename\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
66 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
67
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
68
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
69 print "\n-----------------\n\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
70
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
71 my %hash;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
72 my $chipCount = 0;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
73 my @header;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
74 open FILE, "< $filename " || die "$filename : $!\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
75 while(<FILE>){
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
76 $chipCount++;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
77 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
78 close FILE;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
79 #print "ChIP: $chipCount\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
80
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
81
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
82 my $controlCount = 0;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
83
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
84 open FILE, "< $controlFilename " || die "$controlFilename : $!\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
85 while(<FILE>){
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
86 next if (/track/);
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
87 my $entry = $_;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
88 my @fields = split(/\t/,$_);
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
89 $hash{$entry} = $fields[4];
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
90 $controlCount++;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
91 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
92 #print "controlCount : $controlCount\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
93
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
94 close FILE;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
95 open OUT, "> $output_fname" || die "$output_fname: $!\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
96 my $count = 0;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
97 if ($controlCount>$chipCount) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
98 my $prob = $chipCount/$controlCount*1.1;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
99 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
100 my $yes = rand();
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
101 if ($yes<=$prob) {$yes=1;}else {$yes=0;}
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
102 if ($yes) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
103 print OUT $entry ;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
104 $count++;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
105 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
106 if ($count >=$chipCount) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
107 last;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
108 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
109 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
110
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
111 } else {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
112 for my $entry (keys %hash) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
113 print OUT $entry;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
114 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
115 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
116
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
117 close OUT;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
118
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
119
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
120 for my $try (2..$nBootstrap) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
121
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
122 open OUT, "> $output_fname$try" || die "$output_fname$try: $!\n";
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
123 my $count = 0;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
124 if ($controlCount>$chipCount) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
125 my $prob = $chipCount/$controlCount*1.1;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
126 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
127 my $yes = rand();
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
128 if ($yes<=$prob) {$yes=1;}else {$yes=0;}
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
129 if ($yes) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
130 print OUT $entry ;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
131 $count++;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
132 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
133
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
134 if ($count >=$chipCount ) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
135 last;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
136 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
137 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
138 if ($count <$chipCount) { #do it again!
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
139
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
140 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
141 my $yes = rand();
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
142 if ($yes<=$prob) {$yes=1;}else {$yes=0;}
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
143 if ($yes) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
144 print OUT $entry ;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
145 $count++;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
146 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
147
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
148 if ($count >=$chipCount ) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
149 last;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
150 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
151 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
152
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
153 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
154
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
155 } else {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
156 for my $entry (keys %hash) {
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
157 print OUT $entry;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
158 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
159 }
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
160
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
161 close OUT;
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
162
cfede5ba1e76 Uploaded
jbrayet
parents:
diff changeset
163 }