annotate 2.4/src/cluster.pair.pl @ 0:00b9898b8510 draft

Uploaded
author plus91-technologies-pvt-ltd
date Wed, 04 Jun 2014 03:41:27 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 #!/usr/bin/perl
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2 use strict;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 use POSIX;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5 my $usage = "cluster.pair.pl maxdist\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 my $maxdist = shift or die $usage;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8 my %count;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10 while (<STDIN>){
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 chomp;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12 my ($sample, $chrstart, $start, $chrend, $end) = split /\t/;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 my $nstart = floor ($start/$maxdist);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 my $nend = floor ($end/$maxdist);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15 my $coord = {start=>$start, end=>$end};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17 push @{$count{$chrstart}->{$nstart}->{$chrend}->{$nend}->{$sample}}, $coord;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20 print_groups (\%count);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22 sub print_groups {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 my ($rcount) = @_;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 my %count = %{$rcount};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26 foreach my $chrstart (sort {$a<=>$b} keys %count) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 foreach my $posstart (sort {$a<=>$b} keys %{$count{$chrstart}}) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 my %fcoord = %{$count{$chrstart}->{$posstart}};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 foreach my $chrend (sort {$a<=>$b} keys %fcoord) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31 foreach my $posend (sort {$a<=>$b} keys %{$fcoord{$chrend}}){
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32 my @nsamples = sort {$a cmp $b} (keys %{$fcoord{$chrend}->{$posend}});
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34 my $cpos = $fcoord{$chrend}->{$posend};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 my @coords;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 my $totnum=0;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 foreach my $sample (@nsamples) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 my ($num, $avgx, $avgy) = calc_moments(@{$cpos->{$sample}});
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 push (@coords, {start=>$avgx, end=>$avgy});
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42 $totnum+=$num;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 my ($num, $avgx, $avgy) = calc_moments(@coords);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 print $chrstart."\t".$avgx."\t".$chrend."\t".$avgy ."\t".$num."\t".$totnum."\t" ;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 print $_."\t" foreach (@nsamples);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 print "\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 sub calc_moments {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58 my (@pos) = @_;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60 my ($num, $sumx, $sumy) = (0,0,0);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61 foreach my $cpos (@pos) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62 $num++;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 $sumx+=$cpos->{start};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 $sumy+=$cpos->{end};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66 my $avgx = sprintf ("%d", $sumx/$num);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 my $avgy = sprintf ("%d", $sumy/$num);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 return ($num, $avgx, $avgy);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 }