68
|
1 #!/usr/bin/perl -w
|
|
2
|
|
3 use strict;
|
|
4 use Cwd;
|
|
5 use Spreadsheet::WriteExcel;
|
|
6
|
|
7 $| = 1;
|
|
8 my $sample = shift || die "No input\n";
|
|
9 my $dir = shift || "./";
|
|
10 my $outdir = shift || "./";
|
|
11 my $pat = shift || "";
|
|
12 my $cwd = cwd();
|
|
13 (my $runName = $cwd) =~ s/^.*\/(.*?)$/$1/;
|
|
14
|
|
15 # QC
|
|
16 # Results
|
|
17 my @samples = ();
|
|
18 #open INDEX, ">$outdir/index.html";
|
|
19 my $htmlHead = qq(
|
|
20 <!DOCTYPE html>
|
|
21 <html>
|
|
22 <head>
|
|
23 <style type="text/css">
|
|
24 body {font-family:arial;}
|
|
25 table {font-family:arial;border-collapse: collapse; font-size: smaller;}
|
|
26 th {border: 1px solid gray; padding: 5px;}
|
|
27 td {border: 1px solid gray; padding: 5px; text-align: right;}
|
|
28 </style>
|
|
29 </head>
|
|
30 <body>
|
|
31 );
|
|
32 #print INDEX $htmlHead;
|
|
33 # opendir DIR, "$dir";
|
|
34 # while (my $cd = readdir DIR) {
|
|
35 # if ($cd =~ /(.*$pat)\.qc\.targets\.txt$/) {
|
|
36 # my $sam = $1;
|
|
37 # # next if ($sam =~ /R[12]/);
|
|
38 # print STDERR $1 . "\n";
|
|
39 # push @samples, $1;
|
|
40 # }
|
|
41 # }
|
|
42 # close DIR;
|
|
43
|
|
44 #print INDEX "<table>";
|
|
45 #print INDEX "<tr><th>Download:</th><th><a href=\"runQC.xls\">runQC.xls</a></th>";
|
|
46 #print INDEX "<tr><th>Sample</th><th>BAM</th><th>snp</th><th>indel</th><th>readCnt</th><th>Amp > 100</th>\n";
|
|
47 #foreach my $sam (sort @samples) {
|
|
48 # print INDEX "<tr><td><a href=$sam.html>$sam</a></td><td><a href=$sam.bam>BAM</a></td><td><a href=$sam.bam.bai>BAI</a></td></tr>\n";
|
|
49
|
|
50 #}
|
|
51 my %link = ();
|
|
52 my $excelBook0 = Spreadsheet::WriteExcel->new("$outdir/runQC.xls");
|
|
53 my $excel0 = $excelBook0->add_worksheet("table1");
|
|
54 my $excel0Ref = [[qw/sampleName runName totalReads pct100 ntbGenes/]];
|
|
55
|
|
56 print STDERR "Processing $sample\n";
|
|
57 # next if ($sample =~ /R[12]/);
|
|
58
|
|
59 my $readCnt = 0;
|
|
60 my $amp100 = 0;
|
|
61 my %ntbGenes = ();
|
|
62
|
|
63 open OUT, ">$outdir/$sample.html";
|
|
64 open OUT2, ">$outdir/$sample.tsv";
|
|
65 my $excelBook = Spreadsheet::WriteExcel->new("$outdir/$sample.xls");
|
|
66 my $excel1 = $excelBook->add_worksheet("table1");
|
|
67 my $excel2 = $excelBook->add_worksheet("table2");
|
|
68 print OUT $htmlHead;
|
|
69 my %QC = ();
|
|
70 open QC, "<$dir/$sample.qc.targets.txt";
|
|
71 readline QC;
|
|
72 print STDERR "Reading in $sample.qc.targets.txt\n";
|
|
73 while (<QC>) {
|
|
74 chomp;
|
|
75 my @row = split(/\t/, $_);
|
|
76 my @id = split(/[\_\.\-:]/, $row[0]);
|
|
77 $row[-1] = 0 if ($row[-1] eq "NA");
|
|
78 $readCnt += $row[-1]; # DP
|
|
79 if ($#id != 10) {
|
|
80 $id[0] =~ /(\D+)(\d+)/;
|
|
81 $id[0] = $2;
|
|
82 unshift @id, $1;
|
|
83 }
|
|
84
|
|
85 if ($row[-1] >= 100) {
|
|
86 $amp100++
|
|
87 }
|
|
88 else {
|
|
89 $ntbGenes{$row[0]}{dp} = $row[-1];
|
|
90 $ntbGenes{$row[0]}{id} = [@id];
|
|
91 }
|
|
92
|
|
93 $QC{$row[0]}{QC} = [@id, @row];# if ($id[0]);
|
|
94 foreach my $c ($row[4] .. $row[5]) {
|
|
95 $link{$id[-3] . ":" . $c}{$row[0]} = "Assay";
|
|
96 }
|
|
97 foreach my $c ($row[2] .. $row[4], $row[5] .. $row[3]) {
|
|
98 $link{$id[-3] . ":" . $c}{$row[0]} = "LSO";
|
|
99 }
|
|
100 }
|
|
101 close QC;
|
|
102
|
|
103 open RES, "<$dir/$sample.res.filtered.tsv" or die "Unable to open $dir/$sample\n";
|
|
104 my %uniq = ();
|
|
105 my $colCnt = 0;
|
|
106 my $resHead = readline(RES);
|
|
107 chomp $resHead;
|
|
108 $resHead =~ s/^#//;
|
|
109 $resHead =~ s/\s+$//;
|
|
110 my %resCol = map { $_ => $colCnt++ } split(/\t/, $resHead);
|
|
111 my @keyColsN = qw/QUAL Gene_Name Codon_Change Amino_Acid_change vaf DP AD Tag CHROM POS ID REF ALT Context Effect_Impact Functional_Class Amino_Acid_length Gene_Coding Transcript_ID Exon_Rank/;
|
|
112 # my @keyColsN = qw/CHROM POS ID REF ALT QUAL DP AD vaf Context Effect_Impact Functional_Class Codon_Change Amino_Acid_change Amino_Acid_length Gene_Name Coding Transcript Exon Tag/;
|
|
113 my @keyColsI = map { $resCol{$_} } @keyColsN;
|
|
114 foreach my $i (0 .. $#keyColsN) {
|
|
115 print STDERR join(":", $i, $keyColsN[$i], $keyColsI[$i]) . "\n";
|
|
116 }
|
|
117
|
|
118 print STDERR "Processing results\n";
|
|
119 while (<RES>) {
|
|
120 chomp;
|
|
121 my @row = split(/\t/, $_);
|
|
122 my $cpos = join(":", @row[0, 1]);
|
|
123 if (exists $link{$cpos}) {
|
|
124 # my $key = join(":", @row[0 .. 4,6, 9,10,11,12,14 .. 21]);
|
|
125 my $key = join(":", @row[@keyColsI]);
|
|
126 #print STDERR join(":", @keyColsI) . "\n";
|
|
127 # print STDERR join(":", @row) . "\n"; sleep 1;
|
|
128 if (not exists($uniq{$key})) {
|
|
129 foreach my $locus (keys(%{$link{$cpos}})) {
|
|
130 next if ($link{$cpos}{$locus} eq "LSO");
|
|
131 push @{$QC{$locus}{RES}}, [@row, $link{$cpos}{$locus}];
|
|
132 # print STDERR "Adding $key to $locus\n\n"; #sleep 1;
|
|
133 }
|
|
134 $uniq{$key} = 0;
|
|
135 }
|
|
136 else {
|
|
137 # print STDERR $key . " : Exists\n\n"; #sleep 1;
|
|
138 }
|
|
139 }
|
|
140 }
|
|
141 close RES;
|
|
142
|
|
143 ## Rplots that are not self-explanatory enough
|
|
144 #print OUT "<img src=\"$sample/$sample.vaf.png\">";
|
|
145 #print OUT "<img src=\"$sample/$sample.raf.png\">";
|
|
146 #print OUT "<img src=\"$sample/$sample.snv-q.png\">";
|
|
147 #print OUT "<img src=\"$sample/$sample.snv-q-zoom.png\">";
|
|
148 #print OUT "<img src=\"$sample/$sample.ins-q.png\">";
|
|
149 #print OUT "<img src=\"$sample/$sample.del-q.png\">";
|
|
150
|
|
151 print OUT "<img src=\"$sample/$sample.amp-dp.png\">";
|
|
152 #print OUT "<img src=\"$sample/$sample.heat.png\">";
|
|
153 #print OUT "<img src=\"$sample/$sample.bias.png\">";
|
|
154 #print OUT "<img src=\"$sample/$sample.biasheat.png\">";
|
|
155 #print OUT "<img src=\"$sample/$sample.vafcut.png\">";
|
|
156 print OUT "<table border=1><tr><th>Download:</th><th><a href=\"$sample.tsv\">TSV</a></th>";
|
|
157 print OUT "<th><a href=\"$sample.xls\">XLS</a></th></table>";
|
|
158 print OUT "<table border=1>\n";
|
|
159 # my @colnames = qw/depth chr pos id ref var qual DP AD vaf context impact effectClass codonChange AAChange RefSeqLength geneName RefSeqClass RefSeqID Exon Tag/;
|
|
160 my @colnames = ("depth", @keyColsN);
|
|
161 print OUT "<tr>" . join("", map { "<th>$_</th>" } ("Amplicon", "c","c2", "b", @colnames)) . "</tr>";
|
|
162 print OUT2 join("\t", "Amplicon", @colnames) . "\n";
|
|
163 my $excelAref = [["Amplicon", @colnames]];
|
|
164 my $excelAref2 = [[qw/gene exon protein vaf func depth /]];
|
|
165 my %rescnt = ();
|
|
166 foreach my $locus (sort keys(%QC)) {
|
|
167 # my @targets = keys(%{$QC{$locus}{QC}});
|
|
168 # print OUT "</td>";
|
|
169 my $nres = 1;
|
|
170 $nres = scalar(@{$QC{$locus}{RES}}) if ($QC{$locus}{RES});
|
|
171
|
|
172 print OUT "<tr><td rowspan=\"$nres\">$locus</td>\n";
|
|
173 print OUT "<td rowspan=\"$nres\"><a href=\"$sample/$sample.$locus.cov.png\">c</a></td>\n";
|
|
174 print OUT "<td rowspan=\"$nres\"><a href=\"$sample/$sample.$locus.cov2.png\">c2</a></td>\n";
|
|
175 print OUT "<td rowspan=\"$nres\"><a href=\"$sample/$sample.$locus.bias.png\">b</a></td>\n";
|
|
176 print OUT "<td rowspan=\"$nres\">$QC{$locus}{QC}->[-1]</td>\n"; #<td rowspan=\"$nres\">";
|
|
177
|
|
178 foreach my $res (@{$QC{$locus}{RES}}) {
|
|
179 $res = [map {$_ || ""} @$res];
|
|
180 print OUT "<td>\n";
|
|
181 print OUT join("</td><td>\n", @{$res}[@keyColsI]);
|
|
182 print OUT "</td></tr><tr>\n";
|
|
183 print OUT2 join("\t", $locus, $QC{$locus}{QC}->[-1], @{$res}[@keyColsI]) . "\n";
|
|
184 push @$excelAref, [$locus, $QC{$locus}{QC}->[-1], @{$res}[@keyColsI]];
|
|
185
|
|
186 # qw/QUAL Gene_Name Codon_Change Amino_Acid_change vaf DP AD Tag CHROM POS ID REF ALT Context Effect_Impact Functional_Class Amino_Acid_length Coding Transcript Exon/;
|
|
187 push @$excelAref2, [map {$_ || "NA"} @{$res}[map { $resCol{$_} } (qw/Gene_Name Exon_Rank Amino_Acid_change vaf Functional_Class/)], $QC{$locus}{QC}->[-1]];
|
|
188 # push @$excelAref2, [map {$_ || "NA"} @{$res}[[qw/Gene_Name Exon_Rank Cdna_change Amino_Acid_change vaf Functional_Class/]], $QC{$locus}{QC}->[-1]];
|
|
189 my $pl = $res->[$keyColsI[7]];
|
|
190 my $ref = $res->[$keyColsI[11]];
|
|
191 my $var = $res->[$keyColsI[12]];
|
|
192 # print STDERR "$pl $ref $var\n"; sleep 1;
|
|
193 if (length($ref) == length($var)) {
|
|
194 $rescnt{$pl . "snp"}++;
|
|
195 }
|
|
196 else {
|
|
197 $rescnt{$pl . "indel"}++;
|
|
198 }
|
|
199 # $rescnt++;
|
|
200 }
|
|
201 print STDERR $locus . ":" . $nres . "\n";
|
|
202 print STDERR $locus . ":" . join("-", @{$QC{$locus}{RES}}) . "\n";
|
|
203 if (scalar(@{$QC{$locus}{RES}}) == 0) { #$nres == 0) {
|
|
204 print OUT2 join("\t", $locus, $QC{$locus}{QC}->[-1], ("-") x scalar(@keyColsI)) . "\n";
|
|
205 push @$excelAref, [$locus, $QC{$locus}{QC}->[-1], ("-") x scalar(@keyColsI)];
|
|
206 }
|
|
207 print OUT "</tr>\n";
|
|
208 }
|
|
209 print OUT "</table>\n";
|
|
210 print OUT "</body></html>\n";
|
|
211 close OUT;
|
|
212 close OUT2;
|
|
213 $excel1->write_col(0,0, $excelAref);
|
|
214 $excel2->write_col(0,0, $excelAref2);
|
|
215 $excelBook->close();
|
|
216 my @resCnt = map { $rescnt{$_} || 0 } qw/Falcosnp Falcoindel/;
|
|
217 my $pctGood = sprintf("%.2f", $amp100 / scalar(keys(%QC)) * 100);
|
|
218 #print INDEX "<tr><td><a href=$sample.html>$sample</a></td><td><a href=$sample.bam>BAM</a></td><td>$resCnt[0]</td><td>$resCnt[1]</td><td>$readCnt</td><td>$pctGood</td></tr>\n";
|
|
219 my $ntbsAmps = join(",", map { s/^(.*?)\.chr.*$/$1/; $_; } keys(%ntbGenes));
|
|
220 push @{$excel0Ref}, [$sample, $runName, $readCnt, $pctGood, $ntbsAmps];
|
|
221
|
|
222 $excel0->write_col(0,0, $excel0Ref);
|
|
223 $excelBook0->close();
|
|
224
|
|
225 #print INDEX "</table>";
|
|
226 #print INDEX "<img src=\"alnStats.png\">";
|
|
227 #print INDEX "<img src=\"errStats.png\">";
|
|
228 #print INDEX "<img src=\"qualStats.png\">";
|
|
229 #print INDEX "</html>";
|
|
230 #close INDEX;
|
|
231
|
|
232
|