annotate createControlPeakSubSet.pl @ 1:75cb9dfa2a43 draft

Uploaded
author jbrayet
date Mon, 04 Jan 2016 11:38:35 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
1 #!/usr/bin/perl
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
2
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
3 #create a control dataset with the same number of reads as in the SAMPLE (highest peaks)
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
4
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
5 use strict;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
6 use warnings;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
7 use diagnostics;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
8
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
9 my $usage = qq{
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
10 $0
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
11
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
12 -----------------------------
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
13 mandatory parameters:
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
14
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
15 -f CHiP_file
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
16 -c control_file
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
17 -o output file
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
18 -----------------------------
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
19 optional parameters:
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
20
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
21 -n number of files to create
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
22
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
23 none
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
24 };
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
25
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
26 if(scalar(@ARGV) == 0){
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
27 print $usage;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
28 exit(0);
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
29 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
30
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
31 ## mandatory arguments
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
32
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
33 my $filename = "";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
34 my $output_fname = "";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
35
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
36 my $controlFilename = "";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
37
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
38 my $nBootstrap = 1;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
39
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
40
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
41 ## optional arguments
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
42
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
43 ## parse command line arguments
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
44
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
45 while(scalar(@ARGV) > 0){
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
46 my $this_arg = shift @ARGV;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
47 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
48
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
49 elsif ( $this_arg eq '-f') {$filename = shift @ARGV;}
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
50 elsif ( $this_arg eq '-c') {$controlFilename = shift @ARGV;}
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
51 elsif ( $this_arg eq '-o') {$output_fname = shift @ARGV;}
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
52 elsif ( $this_arg eq '-n') {$nBootstrap = shift @ARGV;}
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
53
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
54 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
55 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
56
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
57 if ( $filename eq ""){
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
58 die "you should specify chip file\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
59 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
60 if( $controlFilename eq ""){
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
61 die "you should specify control file\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
62 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
63
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
64 if( $output_fname eq ""){
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
65 die "you should specify output filename\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
66 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
67
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
68
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
69 print "\n-----------------\n\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
70
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
71 my %hash;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
72 my $chipCount = 0;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
73 my @header;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
74 open FILE, "< $filename " || die "$filename : $!\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
75 while(<FILE>){
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
76 $chipCount++;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
77 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
78 close FILE;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
79 #print "ChIP: $chipCount\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
80
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
81
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
82 my $controlCount = 0;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
83
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
84 open FILE, "< $controlFilename " || die "$controlFilename : $!\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
85 while(<FILE>){
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
86 next if (/track/);
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
87 my $entry = $_;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
88 my @fields = split(/\t/,$_);
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
89 $hash{$entry} = $fields[4];
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
90 $controlCount++;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
91 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
92 #print "controlCount : $controlCount\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
93
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
94 close FILE;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
95 open OUT, "> $output_fname" || die "$output_fname: $!\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
96 my $count = 0;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
97 if ($controlCount>$chipCount) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
98 my $prob = $chipCount/$controlCount*1.1;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
99 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
100 my $yes = rand();
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
101 if ($yes<=$prob) {$yes=1;}else {$yes=0;}
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
102 if ($yes) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
103 print OUT $entry ;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
104 $count++;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
105 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
106 if ($count >=$chipCount) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
107 last;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
108 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
109 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
110
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
111 } else {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
112 for my $entry (keys %hash) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
113 print OUT $entry;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
114 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
115 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
116
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
117 close OUT;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
118
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
119
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
120 for my $try (2..$nBootstrap) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
121
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
122 open OUT, "> $output_fname$try" || die "$output_fname$try: $!\n";
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
123 my $count = 0;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
124 if ($controlCount>$chipCount) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
125 my $prob = $chipCount/$controlCount*1.1;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
126 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
127 my $yes = rand();
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
128 if ($yes<=$prob) {$yes=1;}else {$yes=0;}
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
129 if ($yes) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
130 print OUT $entry ;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
131 $count++;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
132 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
133
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
134 if ($count >=$chipCount ) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
135 last;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
136 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
137 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
138 if ($count <$chipCount) { #do it again!
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
139
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
140 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
141 my $yes = rand();
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
142 if ($yes<=$prob) {$yes=1;}else {$yes=0;}
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
143 if ($yes) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
144 print OUT $entry ;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
145 $count++;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
146 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
147
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
148 if ($count >=$chipCount ) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
149 last;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
150 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
151 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
152
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
153 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
154
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
155 } else {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
156 for my $entry (keys %hash) {
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
157 print OUT $entry;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
158 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
159 }
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
160
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
161 close OUT;
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
162
75cb9dfa2a43 Uploaded
jbrayet
parents:
diff changeset
163 }