Mercurial > repos > plus91-technologies-pvt-ltd > ss_test_tool
comparison 2.4/src/ReadCluster.pl @ 0:00b9898b8510 draft
Uploaded
| author | plus91-technologies-pvt-ltd |
|---|---|
| date | Wed, 04 Jun 2014 03:41:27 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:00b9898b8510 |
|---|---|
| 1 #!/usr/bin/perl | |
| 2 | |
| 3 =head1 NAME | |
| 4 ReadCluster.pl | |
| 5 | |
| 6 =head1 SYNOPSIS | |
| 7 | |
| 8 USAGE: ReadCluster.pl --input input_sam_file --output output_prefix [--threshold 10000 --minClusterSize 4] | |
| 9 | |
| 10 =head1 OPTIONS | |
| 11 | |
| 12 B<--input,-i> | |
| 13 Input file | |
| 14 | |
| 15 B<--output,-o> | |
| 16 output prefix | |
| 17 | |
| 18 B<--window, -w> | |
| 19 Window size | |
| 20 | |
| 21 B<--minClusterSize, -m> | |
| 22 Min size of cluster | |
| 23 | |
| 24 B<--help,-h> | |
| 25 This help message | |
| 26 | |
| 27 =head1 DESCRIPTION | |
| 28 | |
| 29 | |
| 30 =head1 INPUT | |
| 31 | |
| 32 | |
| 33 =head1 OUTPUT | |
| 34 | |
| 35 | |
| 36 =head1 CONTACT | |
| 37 Jaysheel D. Bhavsar @ bjaysheel[at]gmail[dot]com | |
| 38 | |
| 39 | |
| 40 ==head1 EXAMPLE | |
| 41 ReadCluster.pl --input=filename.sam --window=10000 --output=PREFIX | |
| 42 | |
| 43 =cut | |
| 44 | |
| 45 use strict; | |
| 46 use warnings; | |
| 47 use Data::Dumper; | |
| 48 use DBI; | |
| 49 use Pod::Usage; | |
| 50 use Scalar::Util qw(looks_like_number); | |
| 51 use Getopt::Long qw(:config no_ignore_case no_auto_abbrev pass_through); | |
| 52 | |
| 53 my %options = (); | |
| 54 my $results = GetOptions (\%options, | |
| 55 'input|i=s', | |
| 56 'output|o=s', | |
| 57 'window|w=s', | |
| 58 'minClusterSize|m=s', | |
| 59 'help|h') || pod2usage(); | |
| 60 | |
| 61 ## display documentation | |
| 62 if( $options{'help'} ){ | |
| 63 pod2usage( {-exitval => 0, -verbose => 2, -output => \*STDERR} ); | |
| 64 } | |
| 65 ############################################################################# | |
| 66 ## make sure everything passed was peachy | |
| 67 &check_parameters(\%options); | |
| 68 | |
| 69 my $r1_start = 0; | |
| 70 my $r2_start = 0; | |
| 71 my $r1_end = $r1_start + $options{window}; | |
| 72 my $r2_end = $r2_start + $options{window}; | |
| 73 my $r1_chr = ""; | |
| 74 my $r2_chr = ""; | |
| 75 | |
| 76 my @cluster = (); | |
| 77 | |
| 78 open (FHD, "<", $options{input}) or die "Cound not open file $options{input}\n"; | |
| 79 open (INTRA, ">", $options{output} . ".intra.sam") or die "Cound not open file $options{output}.intra.sam\n"; | |
| 80 open (INTER, ">", $options{output} . ".inter.sam") or die "Cound not open file $options{output}.inter.sam\n"; | |
| 81 | |
| 82 while (<FHD>){ | |
| 83 chomp $_; | |
| 84 | |
| 85 #skip processing lines starting with @ just print to output file. | |
| 86 if ($_ =~ /^@/){ | |
| 87 print INTRA $_."\n"; | |
| 88 print INTER $_."\n"; | |
| 89 next; | |
| 90 } | |
| 91 #print "$_\n"; | |
| 92 check_sequence($_); | |
| 93 } | |
| 94 | |
| 95 close(FHD); | |
| 96 close(INTRA); | |
| 97 close(INTER); | |
| 98 | |
| 99 exit(0); | |
| 100 | |
| 101 ############################################################################# | |
| 102 sub check_parameters { | |
| 103 my $options = shift; | |
| 104 | |
| 105 my @required = ("input", "output"); | |
| 106 | |
| 107 foreach my $key (@required) { | |
| 108 unless ($options{$key}) { | |
| 109 print STDERR "ARG: $key is required\n"; | |
| 110 pod2usage({-exitval => 2, -message => "error message", -verbose => 1, -output => \*STDERR}); | |
| 111 exit(-1); | |
| 112 } | |
| 113 } | |
| 114 | |
| 115 unless($options{window}) { $options{window} = 10000; } | |
| 116 unless($options{minClusterSize}) { $options{minClusterSize} = 4; } | |
| 117 } | |
| 118 | |
| 119 ############################################################################# | |
| 120 sub check_sequence { | |
| 121 my $line = shift; | |
| 122 | |
| 123 my @data = split(/\t/, $line); | |
| 124 | |
| 125 ## check if mates are within the window. | |
| 126 if ((inWindow($data[3], 1)) && (inWindow($data[7], 2)) && | |
| 127 ($r1_chr =~ /$data[2]/) && ($r2_chr =~ /$data[6]/)) { | |
| 128 | |
| 129 ## if minClusterSize is reached output | |
| 130 if (scalar(@cluster) >= $options{minClusterSize}) { | |
| 131 | |
| 132 ## if chr are the same then print intra-chr else inter-chr | |
| 133 if ($data[6] =~ /=/) { | |
| 134 print INTRA $line."\n"; | |
| 135 } else { | |
| 136 print INTER $line."\n"; | |
| 137 } | |
| 138 } else { | |
| 139 push @cluster, $line; | |
| 140 } | |
| 141 } else { | |
| 142 | |
| 143 if (scalar(@cluster) >= $options{minClusterSize}) { | |
| 144 dumpCluster(@cluster); | |
| 145 } | |
| 146 | |
| 147 @cluster = (); | |
| 148 $r1_start = $data[3]; | |
| 149 $r2_start = $data[7]; | |
| 150 $r1_end = $r1_start + $options{window}; | |
| 151 $r2_end = $r2_start + $options{window}; | |
| 152 $r1_chr = $data[2]; | |
| 153 $r2_chr = $data[6]; | |
| 154 } | |
| 155 } | |
| 156 | |
| 157 ############################################################################# | |
| 158 sub inWindow { | |
| 159 my $coord = shift; | |
| 160 my $read = shift; | |
| 161 | |
| 162 my $start = 0; | |
| 163 my $end = 0; | |
| 164 | |
| 165 if ($read == 1) { | |
| 166 $start = $r1_start; | |
| 167 $end = $r1_end; | |
| 168 } else { | |
| 169 $start = $r2_start; | |
| 170 $end = $r2_end; | |
| 171 } | |
| 172 | |
| 173 if (($coord > $start) && ($coord < $end)){ | |
| 174 return 1; | |
| 175 } else { return 0; } | |
| 176 } | |
| 177 | |
| 178 ############################################################################# | |
| 179 sub dumpCluster { | |
| 180 my @cluster = shift; | |
| 181 | |
| 182 foreach (@cluster){ | |
| 183 my @data = split(/\t/, $_); | |
| 184 | |
| 185 if ($data[6] =~ /=/) { | |
| 186 print INTRA $_."\n"; | |
| 187 } else { | |
| 188 print INTER $_."\n"; | |
| 189 } | |
| 190 } | |
| 191 } |
