Mercurial > repos > devteam > delete_overlapping_indels
comparison delete_overlapping_indels.pl @ 0:fa09b51ed576 draft default tip
Uploaded tool tarball.
| author | devteam |
|---|---|
| date | Tue, 20 Aug 2013 09:37:51 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:fa09b51ed576 |
|---|---|
| 1 #!/usr/bin/perl -w | |
| 2 | |
| 3 # This program detects overlapping indels in a chromosome and keeps all non-overlapping indels. As for overlapping indels, | |
| 4 # the first encountered one is kept and all others are removed. It requires three inputs: | |
| 5 # The first input is a TABULAR format file containing coordinates of indels in blocks extracted from multi-alignment. | |
| 6 # The second input is an integer number representing the number of the column where indel start coordinates are stored in the input file. | |
| 7 # The third input is an integer number representing the number of the column where indel end coordinates are stored in the input file. | |
| 8 # The output is a TABULAR format file containing all non-overlapping indels in the input file, and the first encountered indel of overlapping ones. | |
| 9 # Note: The number of the first column is 1. | |
| 10 | |
| 11 use strict; | |
| 12 use warnings; | |
| 13 | |
| 14 #varaibles to handle information related to indels | |
| 15 my $indel1 = ""; | |
| 16 my $indel2 = ""; | |
| 17 my @indelArray1 = (); | |
| 18 my @indelArray2 = (); | |
| 19 my $lineCounter1 = 0; | |
| 20 my $lineCounter2 = 0; | |
| 21 my $totalNumberofNonOverlappingIndels = 0; | |
| 22 | |
| 23 # check to make sure having correct files | |
| 24 my $usage = "usage: delete_overlapping_indels.pl [TABULAR.in] [indelStartColumn] [indelEndColumn] [TABULAR.out]\n"; | |
| 25 die $usage unless @ARGV == 4; | |
| 26 | |
| 27 my $inputFile = $ARGV[0]; | |
| 28 my $indelStartColumn = $ARGV[1] - 1; | |
| 29 my $indelEndColumn = $ARGV[2] - 1; | |
| 30 my $outputFile = $ARGV[3]; | |
| 31 | |
| 32 #verifie column numbers | |
| 33 if ($indelStartColumn < 0 ){ | |
| 34 die ("The indel start column number is invalid \n"); | |
| 35 } | |
| 36 if ($indelEndColumn < 0 ){ | |
| 37 die ("The indel end column number is invalid \n"); | |
| 38 } | |
| 39 | |
| 40 #open the input and output files | |
| 41 open (INPUT, "<", $inputFile) || die ("Could not open file $inputFile \n"); | |
| 42 open (OUTPUT, ">", $outputFile) || die ("Could not open file $outputFile \n"); | |
| 43 | |
| 44 #store the input file in the array @rawData | |
| 45 my @indelsRawData = <INPUT>; | |
| 46 | |
| 47 #iterated through the indels of the input file | |
| 48 INDEL1: | |
| 49 foreach $indel1 (@indelsRawData){ | |
| 50 chomp ($indel1); | |
| 51 $lineCounter1++; | |
| 52 | |
| 53 #get the first indel | |
| 54 @indelArray1 = split(/\t/, $indel1); | |
| 55 | |
| 56 #our purpose is to detect overlapping indels and to store one copy of them only in the output file | |
| 57 #all other non-overlapping indels will stored in the output file also | |
| 58 | |
| 59 $lineCounter2 = 0; | |
| 60 | |
| 61 #iterated through the indels of the input file | |
| 62 INDEL2: | |
| 63 foreach $indel2 (@indelsRawData){ | |
| 64 chomp ($indel2); | |
| 65 $lineCounter2++; | |
| 66 | |
| 67 if ($lineCounter2 > $lineCounter1){ | |
| 68 #get the second indel | |
| 69 @indelArray2 = split(/\t/, $indel2); | |
| 70 | |
| 71 #check if the two indels are overlapping | |
| 72 if (($indelArray2[$indelEndColumn] >= $indelArray1[$indelStartColumn] && $indelArray2[$indelEndColumn] <= $indelArray1[$indelEndColumn]) || ($indelArray2[$indelStartColumn] >= $indelArray1[$indelStartColumn] && $indelArray2[$indelStartColumn] <= $indelArray1[$indelEndColumn])){ | |
| 73 #print ("There is an overlap between" . "\n" . $indel1 . "\n" . $indel2 . "\n"); | |
| 74 #print("The two overlapping indels are located at the lines: " . $lineCounter1 . " " . $lineCounter2 . "\n\n"); | |
| 75 | |
| 76 #break out of the loop and go back to the outerloop | |
| 77 next INDEL1; | |
| 78 } | |
| 79 else{ | |
| 80 #print("The two non-overlaapping indels are located at the lines: " . $lineCounter1 . " " . $lineCounter2 . "\n"); | |
| 81 } | |
| 82 } | |
| 83 } | |
| 84 | |
| 85 print OUTPUT $indel1 . "\n"; | |
| 86 $totalNumberofNonOverlappingIndels++; | |
| 87 } | |
| 88 | |
| 89 #print("The total number of indels is: " . $lineCounter1 . "\n"); | |
| 90 #print("The total number of non-overlapping indels is: " . $totalNumberofNonOverlappingIndels . "\n"); | |
| 91 | |
| 92 #close the input and output files | |
| 93 close(OUTPUT); | |
| 94 close(INPUT); |
