|
7
|
1 #!/usr/bin/perl
|
|
|
2
|
|
|
3 """
|
|
|
4 Written by Franco Caramia
|
|
|
5 vs2vcf does a little tiding of the VarScan vcf output
|
|
|
6
|
|
|
7 """
|
|
|
8
|
|
|
9 use strict;
|
|
|
10 use Cwd;
|
|
|
11
|
|
|
12 die qq(
|
|
|
13 Bad numbr of inputs
|
|
|
14
|
|
|
15 ) if(!@ARGV);
|
|
|
16
|
|
|
17 my $options ="";
|
|
|
18 my $normal="";
|
|
|
19 my $command="";
|
|
|
20 my $tumor="";
|
|
|
21 my $output="";
|
|
|
22 my $working_dir = cwd();
|
|
|
23 my $snp = "$working_dir/output.snp.vcf";
|
|
|
24 my $indels = "$working_dir/output.indel.vcf";
|
|
|
25
|
|
|
26 foreach my $input (@ARGV)
|
|
|
27 {
|
|
|
28 my @tmp = split "::", $input;
|
|
|
29 if($tmp[0] eq "COMMAND")
|
|
|
30 {
|
|
|
31 $command = $tmp[1];
|
|
|
32 }
|
|
|
33 if($tmp[0] eq "NORMAL")
|
|
|
34 {
|
|
|
35 $normal = $tmp[1];
|
|
|
36 }
|
|
|
37 elsif($tmp[0] eq "TUMOR")
|
|
|
38 {
|
|
|
39 $tumor = $tmp[1];
|
|
|
40 }
|
|
|
41 elsif($tmp[0] eq "OPTION")
|
|
|
42 {
|
|
|
43 $options = "$options ${tmp[1]}";
|
|
|
44 }
|
|
|
45 elsif($tmp[0] eq "OUTPUT")
|
|
|
46 {
|
|
|
47 $output = $tmp[1];
|
|
|
48 }
|
|
|
49
|
|
|
50 else
|
|
|
51 {
|
|
|
52 die("Unknown Input: $input\n");
|
|
|
53 }
|
|
|
54 }
|
|
|
55
|
|
|
56 system ("$command $normal $tumor $options ");
|
|
|
57 system("grep -v '^\#' $indels | grep -v '^chrom position' >> $snp");
|
|
|
58
|
|
|
59 my @chr_ord = chromosome_order($tumor);
|
|
|
60
|
|
|
61 vs2vcf($snp, $output,\@chr_ord);
|
|
|
62
|
|
|
63
|
|
|
64 sub vs2vcf
|
|
|
65 {
|
|
|
66
|
|
|
67 #
|
|
|
68 # G l o b a l v a r i a b l e s
|
|
|
69 #
|
|
|
70 my $version = '0.1';
|
|
|
71
|
|
|
72 #
|
|
|
73 # Read in file
|
|
|
74 #
|
|
|
75 my $input = shift;
|
|
|
76 my $output = shift;
|
|
|
77 my $chr_ord = shift;
|
|
|
78 open(IN, $input) or die "Can't open $input': $!\n";
|
|
|
79 open(OUT, ">$output") or die "Can't create $output': $!\n";
|
|
|
80 my %output;
|
|
|
81
|
|
|
82 while ( <IN> )
|
|
|
83 {
|
|
|
84 if ( /^#/ )
|
|
|
85 {
|
|
|
86 print OUT;
|
|
|
87 next;
|
|
|
88 }
|
|
|
89 chomp;
|
|
|
90 my $line = $_;
|
|
|
91
|
|
|
92 my @flds = split ( "\t", $line );
|
|
|
93 my $ref = $flds[3];
|
|
|
94 my $alt = $flds[4];
|
|
|
95 #
|
|
|
96 # Deletion of bases
|
|
|
97 #
|
|
|
98 if ( $alt =~ /^\-/ )
|
|
|
99 {
|
|
|
100 ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
|
|
|
101 }
|
|
|
102
|
|
|
103 #
|
|
|
104 # Insertion of bases
|
|
|
105 #
|
|
|
106 if ( $alt =~ /^\+/ )
|
|
|
107 {
|
|
|
108 $flds[4] = $ref.substr($alt,1);
|
|
|
109 }
|
|
|
110 print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
|
|
|
111 $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
|
|
|
112 }
|
|
|
113 close(IN);
|
|
|
114 # if chromosome order given return in sorted order
|
|
|
115 if(defined $chr_ord)
|
|
|
116 {
|
|
|
117 for my $chrom (@{ $chr_ord })
|
|
|
118 {
|
|
|
119 for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} })
|
|
|
120 {
|
|
|
121 print OUT $output{$chrom}{$pos};
|
|
|
122 }
|
|
|
123 }
|
|
|
124 }
|
|
|
125 close(OUT);
|
|
|
126 }
|
|
|
127
|
|
|
128
|
|
|
129 sub chromosome_order
|
|
|
130 {
|
|
|
131 my $input = shift;
|
|
|
132 # calculate flagstats
|
|
|
133 my $COMM = "samtools view -H $input | grep '^\@SQ'";
|
|
|
134 my @SQ = `$COMM`;
|
|
|
135 chomp @SQ;
|
|
|
136 for(my $i = 0; $i <= $#SQ; $i++)
|
|
|
137 {
|
|
|
138 $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/;
|
|
|
139 }
|
|
|
140 return(@SQ);
|
|
|
141 }
|
|
|
142
|
|
|
143
|