annotate varscan_somatic.pl @ 7:c6393229c38b draft

Uploaded
author fcaramia
date Fri, 14 Jun 2013 00:44:41 -0400
parents
children e032d60cae55
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
1 #!/usr/bin/perl
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
2
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
3 """
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
4 Written by Franco Caramia
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
5 vs2vcf does a little tiding of the VarScan vcf output
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
6
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
7 """
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
8
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
9 use strict;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
10 use Cwd;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
11
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
12 die qq(
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
13 Bad numbr of inputs
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
14
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
15 ) if(!@ARGV);
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
16
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
17 my $options ="";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
18 my $normal="";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
19 my $command="";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
20 my $tumor="";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
21 my $output="";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
22 my $working_dir = cwd();
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
23 my $snp = "$working_dir/output.snp.vcf";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
24 my $indels = "$working_dir/output.indel.vcf";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
25
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
26 foreach my $input (@ARGV)
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
27 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
28 my @tmp = split "::", $input;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
29 if($tmp[0] eq "COMMAND")
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
30 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
31 $command = $tmp[1];
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
32 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
33 if($tmp[0] eq "NORMAL")
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
34 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
35 $normal = $tmp[1];
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
36 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
37 elsif($tmp[0] eq "TUMOR")
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
38 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
39 $tumor = $tmp[1];
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
40 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
41 elsif($tmp[0] eq "OPTION")
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
42 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
43 $options = "$options ${tmp[1]}";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
44 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
45 elsif($tmp[0] eq "OUTPUT")
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
46 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
47 $output = $tmp[1];
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
48 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
49
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
50 else
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
51 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
52 die("Unknown Input: $input\n");
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
53 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
54 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
55
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
56 system ("$command $normal $tumor $options ");
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
57 system("grep -v '^\#' $indels | grep -v '^chrom position' >> $snp");
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
58
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
59 my @chr_ord = chromosome_order($tumor);
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
60
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
61 vs2vcf($snp, $output,\@chr_ord);
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
62
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
63
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
64 sub vs2vcf
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
65 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
66
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
67 #
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
68 # G l o b a l v a r i a b l e s
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
69 #
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
70 my $version = '0.1';
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
71
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
72 #
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
73 # Read in file
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
74 #
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
75 my $input = shift;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
76 my $output = shift;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
77 my $chr_ord = shift;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
78 open(IN, $input) or die "Can't open $input': $!\n";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
79 open(OUT, ">$output") or die "Can't create $output': $!\n";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
80 my %output;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
81
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
82 while ( <IN> )
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
83 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
84 if ( /^#/ )
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
85 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
86 print OUT;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
87 next;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
88 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
89 chomp;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
90 my $line = $_;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
91
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
92 my @flds = split ( "\t", $line );
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
93 my $ref = $flds[3];
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
94 my $alt = $flds[4];
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
95 #
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
96 # Deletion of bases
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
97 #
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
98 if ( $alt =~ /^\-/ )
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
99 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
100 ($flds[3], $flds[4]) = ($ref.substr($alt,1), $ref);
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
101 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
102
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
103 #
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
104 # Insertion of bases
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
105 #
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
106 if ( $alt =~ /^\+/ )
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
107 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
108 $flds[4] = $ref.substr($alt,1);
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
109 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
110 print OUT join( "\t", @flds),"\n" unless defined $chr_ord;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
111 $output{$flds[0]}{$flds[1]} = join( "\t", @flds)."\n" if defined $chr_ord;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
112 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
113 close(IN);
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
114 # if chromosome order given return in sorted order
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
115 if(defined $chr_ord)
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
116 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
117 for my $chrom (@{ $chr_ord })
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
118 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
119 for my $pos (sort {$a<=>$b} keys %{ $output{$chrom} })
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
120 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
121 print OUT $output{$chrom}{$pos};
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
122 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
123 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
124 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
125 close(OUT);
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
126 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
127
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
128
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
129 sub chromosome_order
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
130 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
131 my $input = shift;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
132 # calculate flagstats
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
133 my $COMM = "samtools view -H $input | grep '^\@SQ'";
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
134 my @SQ = `$COMM`;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
135 chomp @SQ;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
136 for(my $i = 0; $i <= $#SQ; $i++)
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
137 {
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
138 $SQ[$i] =~ s/^\@SQ\tSN:(.*?)\tLN:\d+$/$1/;
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
139 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
140 return(@SQ);
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
141 }
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
142
c6393229c38b Uploaded
fcaramia
parents:
diff changeset
143