Mercurial > repos > jbrayet > annotategenes_1_0_docker
comparison createControlPeakSubSet.pl @ 1:75cb9dfa2a43 draft
Uploaded
author | jbrayet |
---|---|
date | Mon, 04 Jan 2016 11:38:35 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:21c035ec4d66 | 1:75cb9dfa2a43 |
---|---|
1 #!/usr/bin/perl | |
2 | |
3 #create a control dataset with the same number of reads as in the SAMPLE (highest peaks) | |
4 | |
5 use strict; | |
6 use warnings; | |
7 use diagnostics; | |
8 | |
9 my $usage = qq{ | |
10 $0 | |
11 | |
12 ----------------------------- | |
13 mandatory parameters: | |
14 | |
15 -f CHiP_file | |
16 -c control_file | |
17 -o output file | |
18 ----------------------------- | |
19 optional parameters: | |
20 | |
21 -n number of files to create | |
22 | |
23 none | |
24 }; | |
25 | |
26 if(scalar(@ARGV) == 0){ | |
27 print $usage; | |
28 exit(0); | |
29 } | |
30 | |
31 ## mandatory arguments | |
32 | |
33 my $filename = ""; | |
34 my $output_fname = ""; | |
35 | |
36 my $controlFilename = ""; | |
37 | |
38 my $nBootstrap = 1; | |
39 | |
40 | |
41 ## optional arguments | |
42 | |
43 ## parse command line arguments | |
44 | |
45 while(scalar(@ARGV) > 0){ | |
46 my $this_arg = shift @ARGV; | |
47 if ( $this_arg eq '-h') {print "$usage\n"; exit; } | |
48 | |
49 elsif ( $this_arg eq '-f') {$filename = shift @ARGV;} | |
50 elsif ( $this_arg eq '-c') {$controlFilename = shift @ARGV;} | |
51 elsif ( $this_arg eq '-o') {$output_fname = shift @ARGV;} | |
52 elsif ( $this_arg eq '-n') {$nBootstrap = shift @ARGV;} | |
53 | |
54 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";} | |
55 } | |
56 | |
57 if ( $filename eq ""){ | |
58 die "you should specify chip file\n"; | |
59 } | |
60 if( $controlFilename eq ""){ | |
61 die "you should specify control file\n"; | |
62 } | |
63 | |
64 if( $output_fname eq ""){ | |
65 die "you should specify output filename\n"; | |
66 } | |
67 | |
68 | |
69 print "\n-----------------\n\n"; | |
70 | |
71 my %hash; | |
72 my $chipCount = 0; | |
73 my @header; | |
74 open FILE, "< $filename " || die "$filename : $!\n"; | |
75 while(<FILE>){ | |
76 $chipCount++; | |
77 } | |
78 close FILE; | |
79 #print "ChIP: $chipCount\n"; | |
80 | |
81 | |
82 my $controlCount = 0; | |
83 | |
84 open FILE, "< $controlFilename " || die "$controlFilename : $!\n"; | |
85 while(<FILE>){ | |
86 next if (/track/); | |
87 my $entry = $_; | |
88 my @fields = split(/\t/,$_); | |
89 $hash{$entry} = $fields[4]; | |
90 $controlCount++; | |
91 } | |
92 #print "controlCount : $controlCount\n"; | |
93 | |
94 close FILE; | |
95 open OUT, "> $output_fname" || die "$output_fname: $!\n"; | |
96 my $count = 0; | |
97 if ($controlCount>$chipCount) { | |
98 my $prob = $chipCount/$controlCount*1.1; | |
99 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) { | |
100 my $yes = rand(); | |
101 if ($yes<=$prob) {$yes=1;}else {$yes=0;} | |
102 if ($yes) { | |
103 print OUT $entry ; | |
104 $count++; | |
105 } | |
106 if ($count >=$chipCount) { | |
107 last; | |
108 } | |
109 } | |
110 | |
111 } else { | |
112 for my $entry (keys %hash) { | |
113 print OUT $entry; | |
114 } | |
115 } | |
116 | |
117 close OUT; | |
118 | |
119 | |
120 for my $try (2..$nBootstrap) { | |
121 | |
122 open OUT, "> $output_fname$try" || die "$output_fname$try: $!\n"; | |
123 my $count = 0; | |
124 if ($controlCount>$chipCount) { | |
125 my $prob = $chipCount/$controlCount*1.1; | |
126 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) { | |
127 my $yes = rand(); | |
128 if ($yes<=$prob) {$yes=1;}else {$yes=0;} | |
129 if ($yes) { | |
130 print OUT $entry ; | |
131 $count++; | |
132 } | |
133 | |
134 if ($count >=$chipCount ) { | |
135 last; | |
136 } | |
137 } | |
138 if ($count <$chipCount) { #do it again! | |
139 | |
140 for my $entry (sort {$hash{$b}<=>$hash{$a}} keys %hash) { | |
141 my $yes = rand(); | |
142 if ($yes<=$prob) {$yes=1;}else {$yes=0;} | |
143 if ($yes) { | |
144 print OUT $entry ; | |
145 $count++; | |
146 } | |
147 | |
148 if ($count >=$chipCount ) { | |
149 last; | |
150 } | |
151 } | |
152 | |
153 } | |
154 | |
155 } else { | |
156 for my $entry (keys %hash) { | |
157 print OUT $entry; | |
158 } | |
159 } | |
160 | |
161 close OUT; | |
162 | |
163 } |