annotate 2.4/src/ReadCluster.pl @ 0:00b9898b8510 draft

Uploaded
author plus91-technologies-pvt-ltd
date Wed, 04 Jun 2014 03:41:27 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 #!/usr/bin/perl
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 =head1 NAME
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4 ReadCluster.pl
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 =head1 SYNOPSIS
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8 USAGE: ReadCluster.pl --input input_sam_file --output output_prefix [--threshold 10000 --minClusterSize 4]
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10 =head1 OPTIONS
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12 B<--input,-i>
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 Input file
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15 B<--output,-o>
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16 output prefix
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18 B<--window, -w>
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19 Window size
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21 B<--minClusterSize, -m>
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22 Min size of cluster
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 B<--help,-h>
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25 This help message
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 =head1 DESCRIPTION
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 =head1 INPUT
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33 =head1 OUTPUT
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 =head1 CONTACT
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 Jaysheel D. Bhavsar @ bjaysheel[at]gmail[dot]com
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 ==head1 EXAMPLE
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 ReadCluster.pl --input=filename.sam --window=10000 --output=PREFIX
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 =cut
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 use strict;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46 use warnings;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 use Data::Dumper;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48 use DBI;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 use Pod::Usage;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 use Scalar::Util qw(looks_like_number);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 my %options = ();
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 my $results = GetOptions (\%options,
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55 'input|i=s',
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56 'output|o=s',
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 'window|w=s',
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58 'minClusterSize|m=s',
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59 'help|h') || pod2usage();
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61 ## display documentation
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62 if( $options{'help'} ){
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} );
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 #############################################################################
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66 ## make sure everything passed was peachy
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 &check_parameters(\%options);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 my $r1_start = 0;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 my $r2_start = 0;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
71 my $r1_end = $r1_start + $options{window};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
72 my $r2_end = $r2_start + $options{window};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
73 my $r1_chr = "";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
74 my $r2_chr = "";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
75
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
76 my @cluster = ();
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
77
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
78 open (FHD, "<", $options{input}) or die "Cound not open file $options{input}\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
79 open (INTRA, ">", $options{output} . ".intra.sam") or die "Cound not open file $options{output}.intra.sam\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
80 open (INTER, ">", $options{output} . ".inter.sam") or die "Cound not open file $options{output}.inter.sam\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
81
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
82 while (<FHD>){
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
83 chomp $_;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
84
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
85 #skip processing lines starting with @ just print to output file.
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
86 if ($_ =~ /^@/){
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
87 print INTRA $_."\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
88 print INTER $_."\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
89 next;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
90 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
91 #print "$_\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
92 check_sequence($_);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
93 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
94
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
95 close(FHD);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
96 close(INTRA);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
97 close(INTER);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
98
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
99 exit(0);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
100
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
101 #############################################################################
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
102 sub check_parameters {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
103 my $options = shift;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
104
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
105 my @required = ("input", "output");
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
106
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
107 foreach my $key (@required) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
108 unless ($options{$key}) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
109 print STDERR "ARG: $key is required\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
110 pod2usage({-exitval => 2, -message => "error message", -verbose => 1, -output => \*STDERR});
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
111 exit(-1);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
112 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
113 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
114
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
115 unless($options{window}) { $options{window} = 10000; }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
116 unless($options{minClusterSize}) { $options{minClusterSize} = 4; }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
117 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
118
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
119 #############################################################################
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
120 sub check_sequence {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
121 my $line = shift;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
122
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
123 my @data = split(/\t/, $line);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
124
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
125 ## check if mates are within the window.
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
126 if ((inWindow($data[3], 1)) && (inWindow($data[7], 2)) &&
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
127 ($r1_chr =~ /$data[2]/) && ($r2_chr =~ /$data[6]/)) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
128
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
129 ## if minClusterSize is reached output
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
130 if (scalar(@cluster) >= $options{minClusterSize}) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
131
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
132 ## if chr are the same then print intra-chr else inter-chr
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
133 if ($data[6] =~ /=/) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
134 print INTRA $line."\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
135 } else {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
136 print INTER $line."\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
137 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
138 } else {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
139 push @cluster, $line;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
140 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
141 } else {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
142
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
143 if (scalar(@cluster) >= $options{minClusterSize}) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
144 dumpCluster(@cluster);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
145 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
146
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
147 @cluster = ();
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
148 $r1_start = $data[3];
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
149 $r2_start = $data[7];
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
150 $r1_end = $r1_start + $options{window};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
151 $r2_end = $r2_start + $options{window};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
152 $r1_chr = $data[2];
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
153 $r2_chr = $data[6];
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
154 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
155 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
156
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
157 #############################################################################
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
158 sub inWindow {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
159 my $coord = shift;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
160 my $read = shift;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
161
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
162 my $start = 0;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
163 my $end = 0;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
164
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
165 if ($read == 1) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
166 $start = $r1_start;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
167 $end = $r1_end;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
168 } else {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
169 $start = $r2_start;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
170 $end = $r2_end;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
171 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
172
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
173 if (($coord > $start) && ($coord < $end)){
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
174 return 1;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
175 } else { return 0; }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
176 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
177
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
178 #############################################################################
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
179 sub dumpCluster {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
180 my @cluster = shift;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
181
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
182 foreach (@cluster){
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
183 my @data = split(/\t/, $_);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
184
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
185 if ($data[6] =~ /=/) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
186 print INTRA $_."\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
187 } else {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
188 print INTER $_."\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
189 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
190 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
191 }