annotate 2.4/library/LevD.pm @ 0:00b9898b8510 draft

Uploaded
author plus91-technologies-pvt-ltd
date Wed, 04 Jun 2014 03:41:27 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
1 package LevD;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
2
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
3 use lib "/data2/bsi/reference/softsearch/lib/perl5";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
4 use strict;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
5 use warnings;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
6 use Data::Dumper;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
7 use String::Approx 'adist';
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
8 use String::Approx 'adistr';
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
9 use String::Approx 'aindex';
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
10
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
11 my $WINDOW_SIZE = 100;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
12
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
13 sub new {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
14 my ($class, $file) = @_;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
15 my $self = {};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
16
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
17 bless($self,$class);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
18 $self->init();
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
19
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
20 return $self;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
21 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
22
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
23 sub init {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
24 my ($self) = @_;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
25
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
26 #### default values.
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
27 $self->{index} = 0;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
28 $self->{relative_edit_dist} = 0;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
29 $self->{edit_dist} = 0;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
30 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
31
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
32 sub search {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
33 my ($self, $clip, $chr, $start, $stop, $ref) = @_;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
34
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
35 if (! -s $ref) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
36 die "ERROR: Reference file $ref now found\n";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
37 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
38
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
39 #### extact seq from reference file.
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
40 my $target = $chr .":". $start ."-". $stop;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
41 my $cmd = "samtools faidx $ref $target";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
42
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
43 my @output = $self->_run_system_cmd($cmd);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
44
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
45 #### depending on ref file format seq could be on multiple lines
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
46 #### concatinate all except for the header in one line.
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
47 #### e.g:
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
48 #### >chr1:8222999-8223099
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
49 #### GGTGCAATCATAGCTCACTAAGCTTCAACCTCAAGAGATCCTCCCACCTCAGCCTCCCAG
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
50 #### GTAGCTGGGACTACAGGCAAATGCCATGACACCTAGCTAAT
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
51 my $seq = join("", @output[1..$#output]);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
52
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
53 #### remove new line character
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
54 $seq =~ s/\n//g;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
55
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
56 #### find number of mismatches and start index
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
57 #### of clip to be searched against target seq.
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
58 $self->{relative_edit_dist} = adistr($clip, $seq);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
59 $self->{edit_dist} = adist($clip, $seq);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
60 $self->{index} = aindex($clip, $seq);
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
61 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
62
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
63 sub _run_system_cmd {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
64 my ($self, $cmd) = @_;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
65 my @cmd_output;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
66
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
67 eval {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
68 @cmd_output = qx{$cmd 2>&1};
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
69 if ( ($? << 8) != 0 ) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
70 die "@cmd_output";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
71 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
72 };
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
73 if ($@) {
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
74 die "Error executing command $cmd: $@";
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
75 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
76
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
77 return @cmd_output;
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
78 }
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
79
00b9898b8510 Uploaded
plus91-technologies-pvt-ltd
parents:
diff changeset
80 1;