annotate ccat_int2bed.pl @ 3:313e20192e7e draft default tip

Uploaded
author jbrayet
date Wed, 10 Feb 2016 04:49:33 -0500
parents 3e5c8eba6a73
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
1 #!/usr/bin/perl
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
2
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
3 #filter out dulpicates from SAMPLE (optional) and create a control dataset w/o duplicates with the same number of reads as in the SAMPLE
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
4
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
5 use strict;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
6 use warnings;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
7 use diagnostics;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
8
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
9 my $usage = qq{
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
10 $0
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
11
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
12 -----------------------------
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
13 mandatory parameters:
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
14
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
15 -f peaks
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
16 -t min peak height
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
17 -o output file
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
18
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
19 -----------------------------
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
20 optional parameters:
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
21 -n name
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
22 none
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
23 };
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
24
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
25 if(scalar(@ARGV) == 0){
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
26 print $usage;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
27 exit(0);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
28 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
29
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
30 ## mandatory arguments
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
31
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
32 my $filename = "";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
33 my $output_fname = "";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
34
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
35 my $minPeakH = 0;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
36
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
37 my $chromLengthsFile="";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
38 my $expName = "User Track";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
39
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
40 ## optional arguments
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
41
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
42 ## parse command line arguments
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
43
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
44 while(scalar(@ARGV) > 0){
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
45 my $this_arg = shift @ARGV;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
46 if ( $this_arg eq '-h') {print "$usage\n"; exit; }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
47
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
48 elsif ( $this_arg eq '-f') {$filename = shift @ARGV;}
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
49 elsif ( $this_arg eq '-t') {$minPeakH = shift @ARGV;}
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
50 elsif ( $this_arg eq '-o') {$output_fname = shift @ARGV;}
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
51 elsif ( $this_arg eq '-g') {$chromLengthsFile = shift @ARGV;}
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
52 elsif ( $this_arg eq '-n') {$expName = shift @ARGV;}
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
53 elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
54 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
55
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
56 if ( $filename eq ""){
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
57 die "you should specify chip file\n";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
58 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
59 if( $output_fname eq ""){
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
60 die "you should specify output filename\n";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
61 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
62
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
63 $minPeakH-=0.5 unless ($minPeakH=~m/0\.5/);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
64
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
65 #read chromosome lengths if provided:
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
66 my %max;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
67 if ($chromLengthsFile ne "") {
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
68 open FILE, "< $chromLengthsFile " || die "$chromLengthsFile : $!\n";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
69 while(<FILE>){
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
70 chomp;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
71 if (/(chr\S+)\s(\d+)/) {
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
72 $max{$1}=$2;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
73 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
74 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
75 close FILE;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
76 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
77
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
78 ######
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
79
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
80 print "\n-----------------\n\n";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
81
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
82 my %hash;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
83 my $chipCount = 0;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
84 my @header;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
85
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
86
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
87 open FILE, "< $filename " || die "$filename : $!\n";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
88 open OUT, "> $output_fname" || die "$output_fname: $!\n";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
89 print OUT "track name=\'$expName\' description=\'$expName\'\n";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
90 my $count = 0;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
91 my $scount = 0;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
92
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
93 while(<FILE>){
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
94 chomp;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
95 next if (/max/);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
96 next if (/track/);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
97 next if (/^\#/);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
98 my @fields = split(/\t/,$_);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
99 my $entry = $fields[0]."\t".$fields[2]."\t".$fields[3];
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
100 $count++;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
101 if ($fields[4]>=$minPeakH) {
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
102 $scount ++;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
103 $fields[0]= "chr".$fields[0] unless ($fields[0]=~m/chr/);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
104
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
105 if ($chromLengthsFile ne "") {
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
106 my $maxV = $max{$fields[0]};
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
107 $fields[2]= min($fields[2],$maxV);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
108 $fields[3]=min($fields[3],$maxV);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
109 $fields[1]=min($fields[1],$maxV);
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
110 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
111 print OUT join("\t",$fields[0],$fields[2],$fields[3],$fields[1],$fields[4],"+",$fields[2],$fields[3],"255,120,11","1",$fields[3]-$fields[2],0,"\n");
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
112 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
113 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
114
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
115 close FILE;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
116 close OUT;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
117 print "read: $count peaks; selected: $scount\n";
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
118
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
119 sub min {
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
120 my ($a,$b) = @_;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
121 if($a<$b) {
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
122 return $a;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
123 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
124 $b;
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
125 }
3e5c8eba6a73 Uploaded
jbrayet
parents:
diff changeset
126