annotate readshifter.pl @ 0:d42f4d78c85e draft

Uploaded
author messersc
date Wed, 17 Dec 2014 10:40:23 -0500
parents
children 243f75d0ed6e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
1 #!/usr/bin/perl
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
2 ##############################################
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
3 #### Shifts minus strand reads by read length
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
4 #### Perl script
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
5 #############################################
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
6
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
7
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
8
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
9 use feature qw(say);
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
10
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
11
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
12 # ==================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
13 # Parsing Arguments
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
14 # ==================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
15
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
16 #initialize to NULL
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
17 my $bed_file = NULL; #bed file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
18 my $shift_size = NULL; #shift size
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
19 my $read_length = NULL; #read length
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
20
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
21 #Parse the arguments
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
22 $bed_file = $ARGV[0]; #bed file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
23 $shift_size = $ARGV[1]; #shift size
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
24 $read_length = $ARGV[2]; #read length
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
25
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
26 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
27
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
28
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
29
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
30 # ========================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
31 # Parse the bed file and extend the reads
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
32 # ========================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
33
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
34 #open the file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
35 open(DATA, $bed_file) || die("Can't open the bed file, probably you gave me the wrong path!");
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
36
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
37 #loop through the rest of the file line by line (To do: look for a faster way)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
38 while (<DATA>) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
39 my ($start, $strand) = split(/\t/,$_,2);
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
40 $strand =~ s/\015?\012?$//;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
41
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
42
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
43
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
44 #plus strand
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
45 if ($strand eq '+') {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
46 #do nothing
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
47 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
48 #minus strand
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
49 elsif ($strand eq '-') {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
50 $start = $start - $shift_size + $read_length; #difference between $start and $end should be equal to fragment length ($shift_size)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
51 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
52 #bad format
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
53 else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
54 die("It appears the bed file is not formatted properly. Specifically, I expect to find either + or - in the second column, but I found something else!");
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
55 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
56 if ($start >= 0) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
57 #Now write the new line
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
58 say join "\t", $start, $strand;
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
59 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
60 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
61 #=======================> DONE!