comparison createControlPeakSubSet.pl @ 1:75cb9dfa2a43 draft

Uploaded
author jbrayet
date Mon, 04 Jan 2016 11:38:35 -0500
parents
children
comparison
equal deleted inserted replaced
0:21c035ec4d66 1:75cb9dfa2a43
1 #!/usr/bin/perl
2
3 #create a control dataset with the same number of reads as in the SAMPLE (highest peaks)
4
5 use strict;
6 use warnings;
7 use diagnostics;
8
9 my $usage = qq{
10 $0
11
12 -----------------------------
13 mandatory parameters:
14
15 -f CHiP_file
16 -c control_file
17 -o output file
18 -----------------------------
19 optional parameters:
20
21 -n number of files to create
22
23 none
24 };
25
26 if(scalar(@ARGV) == 0){
27 print $usage;
28 exit(0);
29 }
30
31 ## mandatory arguments
32
33 my $filename = "";
34 my $output_fname = "";
35
36 my $controlFilename = "";
37
38 my $nBootstrap = 1;
39
40
41 ## optional arguments
42
43 ## parse command line arguments
44
45 while(scalar(@ARGV) > 0){
46 my $this_arg = shift @ARGV;
47 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
48
49 elsif ( $this_arg eq '-f') {$filename = shift @ARGV;}
50 elsif ( $this_arg eq '-c') {$controlFilename = shift @ARGV;}
51 elsif ( $this_arg eq '-o') {$output_fname = shift @ARGV;}
52 elsif ( $this_arg eq '-n') {$nBootstrap = shift @ARGV;}
53
54 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
55 }
56
57 if ( $filename eq ""){
58 die "you should specify chip file\n";
59 }
60 if( $controlFilename eq ""){
61 die "you should specify control file\n";
62 }
63
64 if( $output_fname eq ""){
65 die "you should specify output filename\n";
66 }
67
68
69 print "\n-----------------\n\n";
70
71 my %hash;
72 my $chipCount = 0;
73 my @header;
74 open FILE, "< $filename " || die "$filename : $!\n";
75 while(<FILE>){
76 $chipCount++;
77 }
78 close FILE;
79 #print "ChIP: $chipCount\n";
80
81
82 my $controlCount = 0;
83
84 open FILE, "< $controlFilename " || die "$controlFilename : $!\n";
85 while(<FILE>){
86 next if (/track/);
87 my $entry = $_;
88 my @fields = split(/\t/,$_);
89 $hash{$entry} = $fields[4];
90 $controlCount++;
91 }
92 #print "controlCount : $controlCount\n";
93
94 close FILE;
95 open OUT, "> $output_fname" || die "$output_fname: $!\n";
96 my $count = 0;
97 if ($controlCount>$chipCount) {
98 my $prob = $chipCount/$controlCount*1.1;
99 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) {
100 my $yes = rand();
101 if ($yes<=$prob) {$yes=1;}else {$yes=0;}
102 if ($yes) {
103 print OUT $entry ;
104 $count++;
105 }
106 if ($count >=$chipCount) {
107 last;
108 }
109 }
110
111 } else {
112 for my $entry (keys %hash) {
113 print OUT $entry;
114 }
115 }
116
117 close OUT;
118
119
120 for my $try (2..$nBootstrap) {
121
122 open OUT, "> $output_fname$try" || die "$output_fname$try: $!\n";
123 my $count = 0;
124 if ($controlCount>$chipCount) {
125 my $prob = $chipCount/$controlCount*1.1;
126 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) {
127 my $yes = rand();
128 if ($yes<=$prob) {$yes=1;}else {$yes=0;}
129 if ($yes) {
130 print OUT $entry ;
131 $count++;
132 }
133
134 if ($count >=$chipCount ) {
135 last;
136 }
137 }
138 if ($count <$chipCount) { #do it again!
139
140 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) {
141 my $yes = rand();
142 if ($yes<=$prob) {$yes=1;}else {$yes=0;}
143 if ($yes) {
144 print OUT $entry ;
145 $count++;
146 }
147
148 if ($count >=$chipCount ) {
149 last;
150 }
151 }
152
153 }
154
155 } else {
156 for my $entry (keys %hash) {
157 print OUT $entry;
158 }
159 }
160
161 close OUT;
162
163 }