annotate compute_motifs_frequency.pl @ 0:d66f925bfbeb draft

Uploaded tool tarball.
author devteam
date Tue, 20 Aug 2013 09:31:57 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
1 #!/usr/bin/perl -w
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
2
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
3 # a program to compute the frequency of each motif at each window in both upstream and downstream sequences flanking indels
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
4 # in a chromosome/genome.
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
5 # the first input is a TABULAR format file containing the motif names and sequences, such that the file consists of two
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
6 # columns: the left column represents the motif names and the right column represents the motif sequence, one line per motif.
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
7 # the second input is a TABULAR format file containing the upstream and downstream sequences flanking indels, one line per indel.
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
8 # the fourth input is an integer number representing the window size according to which the upstream and downstream sequences
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
9 # flanking each indel will be divided.
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
10 # the first output is a TABULAR format file containing the windows into which both upstream and downstream sequences flanking
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
11 # indels are divided.
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
12 # the second output is a TABULAR format file containing the motifs and their corresponding frequencies at each window in both
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
13 # upstream and downstream sequences flanking indels, one line per motif.
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
14
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
15 use strict;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
16 use warnings;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
17
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
18 #variable to handle the falnking sequences information
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
19 my $sequence = "";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
20 my $upstreamFlankingSequence = "";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
21 my $downstreamFlankingSequence = "";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
22 my $discardedSequenceLength = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
23 my $lengthOfDownstreamFlankingSequenceAfterTrimming = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
24
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
25 #variable to handle the window information
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
26 my $window = "";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
27 my $windowStartIndex = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
28 my $windowNumber = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
29 my $totalWindowsNumber = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
30 my $totalNumberOfWindowsInUpstreamSequence = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
31 my $totalNumberOfWindowsInDownstreamSequence = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
32 my $totalWindowsNumberInBothFlankingSequences = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
33 my $totalWindowsNumberInMotifCountersTwoDimArray = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
34 my $upstreamAndDownstreamFlankingSequencesWindows = "";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
35
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
36 #variable to handle the motif information
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
37 my $motif = "";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
38 my $motifSequence = "";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
39 my $motifNumber = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
40 my $totalMotifsNumber = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
41
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
42 #arrays to sotre window and motif data
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
43 my @windowsArray = ();
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
44 my @motifNamesArray = ();
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
45 my @motifSequencesArray = ();
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
46 my @motifCountersTwoDimArray = ();
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
47
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
48 #variables to store line counter values
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
49 my $lineCounter1 = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
50 my $lineCounter2 = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
51
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
52 # check to make sure having correct files
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
53 my $usage = "usage: compute_motifs_frequency.pl [TABULAR.in] [TABULAR.in] [windowSize] [TABULAR.out] [TABULAR.out]\n";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
54 die $usage unless @ARGV == 5;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
55
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
56 #get the input and output arguments
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
57 my $motifsInputFile = $ARGV[0];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
58 my $indelFlankingSequencesInputFile = $ARGV[1];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
59 my $windowSize = $ARGV[2];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
60 my $indelFlankingSequencesWindowsOutputFile = $ARGV[3];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
61 my $motifFrequenciesOutputFile = $ARGV[4];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
62
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
63 #open the input and output files
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
64 open (INPUT1, "<", $motifsInputFile) || die("Could not open file $motifsInputFile \n");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
65 open (INPUT2, "<", $indelFlankingSequencesInputFile) || die("Could not open file $indelFlankingSequencesInputFile \n");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
66 open (OUTPUT1, ">", $indelFlankingSequencesWindowsOutputFile) || die("Could not open file $indelFlankingSequencesWindowsOutputFile \n");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
67 open (OUTPUT2, ">", $motifFrequenciesOutputFile) || die("Could not open file $motifFrequenciesOutputFile \n");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
68
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
69 #store the motifs input file in the array @motifsData
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
70 my @motifsData = <INPUT1>;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
71
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
72 #iterated through the motifs (lines) of the motifs input file
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
73 foreach $motif (@motifsData){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
74 chomp ($motif);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
75 #print ($motif . "\n");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
76
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
77 #split the motif data into its name and its sequence
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
78 my @motifNameAndSequenceArray = split(/\t/, $motif);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
79
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
80 #store the name of the motif into the array @motifNamesArray
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
81 push @motifNamesArray, $motifNameAndSequenceArray[0];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
82
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
83 #store the sequence of the motif into the array @motifSequencesArray
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
84 push @motifSequencesArray, $motifNameAndSequenceArray[1];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
85 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
86
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
87 #compute the size of the motif names array
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
88 $totalMotifsNumber = @motifNamesArray;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
89
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
90 #store the input file in the array @sequencesData
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
91 my @sequencesData = <INPUT2>;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
92
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
93 #iterated through the sequences of the second input file in order to create windwos file
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
94 foreach $sequence (@sequencesData){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
95 chomp ($sequence);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
96 $lineCounter1++;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
97
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
98 my @indelAndSequenceArray = split(/\t/, $sequence);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
99
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
100 #get the upstream falnking sequence
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
101 $upstreamFlankingSequence = $indelAndSequenceArray[3];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
102
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
103 #if the window size is 0, then the whole upstream will be one window only
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
104 if ($windowSize == 0){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
105 $totalNumberOfWindowsInUpstreamSequence = 1;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
106 $windowSize = length ($upstreamFlankingSequence);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
107 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
108 else{
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
109 #compute the total number of windows into which the upstream flanking sequence will be divided
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
110 $totalNumberOfWindowsInUpstreamSequence = length ($upstreamFlankingSequence) / $windowSize;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
111
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
112 #compute the length of the subsequence to be discared from the upstream flanking sequence if any
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
113 $discardedSequenceLength = length ($upstreamFlankingSequence) % $windowSize;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
114
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
115 #check if the sequence could be split into windows of equal sizes
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
116 if ($discardedSequenceLength != 0) {
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
117 #trim the upstream flanking sequence
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
118 $upstreamFlankingSequence = substr($upstreamFlankingSequence, $discardedSequenceLength);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
119 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
120 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
121
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
122 #split the upstream flanking sequence into windows
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
123 for ($windowNumber = 0; $windowNumber < $totalNumberOfWindowsInUpstreamSequence; $windowNumber++){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
124 $windowStartIndex = $windowNumber * $windowSize;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
125 print OUTPUT1 (substr($upstreamFlankingSequence, $windowStartIndex, $windowSize) . "\t");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
126 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
127
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
128 #add a column representing the indel
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
129 print OUTPUT1 ("indel" . "\t");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
130
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
131 #get the downstream falnking sequence
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
132 $downstreamFlankingSequence = $indelAndSequenceArray[4];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
133
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
134 #if the window size is 0, then the whole upstream will be one window only
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
135 if ($windowSize == 0){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
136 $totalNumberOfWindowsInDownstreamSequence = 1;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
137 $windowSize = length ($downstreamFlankingSequence);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
138 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
139 else{
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
140 #compute the total number of windows into which the downstream flanking sequence will be divided
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
141 $totalNumberOfWindowsInDownstreamSequence = length ($downstreamFlankingSequence) / $windowSize;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
142
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
143 #compute the length of the subsequence to be discared from the upstream flanking sequence if any
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
144 $discardedSequenceLength = length ($downstreamFlankingSequence) % $windowSize;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
145
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
146 #check if the sequence could be split into windows of equal sizes
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
147 if ($discardedSequenceLength != 0) {
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
148 #compute the length of the sequence to be discarded
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
149 $lengthOfDownstreamFlankingSequenceAfterTrimming = length ($downstreamFlankingSequence) - $discardedSequenceLength;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
150
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
151 #trim the downstream flanking sequence
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
152 $downstreamFlankingSequence = substr($downstreamFlankingSequence, 0, $lengthOfDownstreamFlankingSequenceAfterTrimming);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
153 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
154 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
155
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
156 #split the downstream flanking sequence into windows
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
157 for ($windowNumber = 0; $windowNumber < $totalNumberOfWindowsInDownstreamSequence; $windowNumber++){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
158 $windowStartIndex = $windowNumber * $windowSize;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
159 print OUTPUT1 (substr($downstreamFlankingSequence, $windowStartIndex, $windowSize) . "\t");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
160 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
161
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
162 print OUTPUT1 ("\n");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
163 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
164
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
165 #compute the total number of windows on both upstream and downstream sequences flanking the indel
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
166 $totalWindowsNumberInBothFlankingSequences = $totalNumberOfWindowsInUpstreamSequence + $totalNumberOfWindowsInDownstreamSequence;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
167
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
168 #add an additional cell to store the name of the motif and another one for the indel itself
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
169 $totalWindowsNumberInMotifCountersTwoDimArray = $totalWindowsNumberInBothFlankingSequences + 1 + 1;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
170
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
171 #initialize the two dimensional array $motifCountersTwoDimArray. the first column will be initialized with motif names
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
172 for ($motifNumber = 0; $motifNumber < $totalMotifsNumber; $motifNumber++){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
173
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
174 for ($windowNumber = 0; $windowNumber < $totalWindowsNumberInMotifCountersTwoDimArray; $windowNumber++){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
175
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
176 if ($windowNumber == 0){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
177 $motifCountersTwoDimArray [$motifNumber] [0] = $motifNamesArray[$motifNumber];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
178 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
179 elsif ($windowNumber == $totalNumberOfWindowsInUpstreamSequence + 1){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
180 $motifCountersTwoDimArray [$motifNumber] [$windowNumber] = "indel";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
181 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
182 else{
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
183 $motifCountersTwoDimArray [$motifNumber] [$windowNumber] = 0;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
184 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
185 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
186 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
187
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
188 close(OUTPUT1);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
189
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
190 #open the file the contains the windows of the upstream and downstream flanking sequences, which is actually the first output file
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
191 open (INPUT3, "<", $indelFlankingSequencesWindowsOutputFile) || die("Could not open file $indelFlankingSequencesWindowsOutputFile \n");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
192
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
193 #store the first output file containing the windows of both upstream and downstream flanking sequences in the array @windowsData
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
194 my @windowsData = <INPUT3>;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
195
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
196 #iterated through the lines of the first output file. Each line represents
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
197 #the windows of the upstream and downstream flanking sequences of an indel
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
198 foreach $upstreamAndDownstreamFlankingSequencesWindows (@windowsData){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
199
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
200 chomp ($upstreamAndDownstreamFlankingSequencesWindows);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
201 $lineCounter2++;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
202
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
203 #split both upstream and downstream flanking sequences into their windows
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
204 my @windowsArray = split(/\t/, $upstreamAndDownstreamFlankingSequencesWindows);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
205
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
206 $totalWindowsNumber = @windowsArray;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
207
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
208 #iterate through the windows to search for matched motifs and increment their corresponding counters accordingly
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
209 WINDOWS:
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
210 for ($windowNumber = 0; $windowNumber < $totalWindowsNumber; $windowNumber++){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
211
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
212 #get the window
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
213 $window = $windowsArray[$windowNumber];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
214
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
215 #if the window is the one that contains the indel, then skip the indel window
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
216 if ($window eq "indel") {
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
217 next WINDOWS;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
218 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
219 else{ #iterated through the motif sequences to check their occurrences in the winodw
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
220 #and increment their corresponding counters accordingly
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
221
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
222 for ($motifNumber = 0; $motifNumber < $totalMotifsNumber; $motifNumber++){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
223 #get the motif sequence
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
224 $motifSequence = $motifSequencesArray[$motifNumber];
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
225
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
226 #if the motif is found in the window, then increment its corresponding counter
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
227 if ($window =~ m/$motifSequence/i){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
228 $motifCountersTwoDimArray [$motifNumber] [$windowNumber + 1]++;
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
229 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
230 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
231 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
232 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
233 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
234
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
235 #store the motif counters values in the second output file
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
236 for ($motifNumber = 0; $motifNumber < $totalMotifsNumber; $motifNumber++){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
237
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
238 for ($windowNumber = 0; $windowNumber <= $totalWindowsNumber; $windowNumber++){
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
239
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
240 print OUTPUT2 $motifCountersTwoDimArray [$motifNumber] [$windowNumber] . "\t";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
241 #print ($motifCountersTwoDimArray [$motifNumber] [$windowNumber] . " ");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
242 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
243 print OUTPUT2 "\n";
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
244 #print ("\n");
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
245 }
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
246
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
247 #close the input and output files
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
248 close(OUTPUT2);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
249 close(OUTPUT1);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
250 close(INPUT3);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
251 close(INPUT2);
d66f925bfbeb Uploaded tool tarball.
devteam
parents:
diff changeset
252 close(INPUT1);