Mercurial > repos > stef > falco
view falco/lib/perl/mkHtmlReportGalaxy.pl @ 80:ebb4c3b8b9db draft
Uploaded
author | stef |
---|---|
date | Fri, 20 Mar 2015 08:58:17 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl -w use strict; use Cwd; use Spreadsheet::WriteExcel; $| = 1; my $sample = shift || die "Missing input\n"; my $qc_targets_txt = shift || die "Missing input\n"; my $dir = shift || "./"; my $outdir = shift || "./"; my $pat = shift || ""; my $cwd = cwd(); (my $runName = $cwd) =~ s/^.*\/(.*?)$/$1/; # QC # Results my @samples = (); my $htmlHead = qq( <!DOCTYPE html> <html> <head> <style type="text/css"> body {font-family:arial;} table {font-family:arial;border-collapse: collapse; font-size: smaller;} th {border: 1px solid gray; padding: 5px;} td {border: 1px solid gray; padding: 5px; text-align: right;} </style> </head> <body> ); my %link = (); my $excelBook0 = Spreadsheet::WriteExcel->new("$outdir/runQC.xls"); my $excel0 = $excelBook0->add_worksheet("table1"); my $excel0Ref = [[qw/sampleName runName totalReads pct100 ntbGenes/]]; print STDERR "Processing $sample\n"; # next if ($sample =~ /R[12]/); my $readCnt = 0; my $amp100 = 0; my %ntbGenes = (); open OUT, ">$outdir/$sample.html"; open OUT2, ">$outdir/$sample.tsv"; my $excelBook = Spreadsheet::WriteExcel->new("$outdir/$sample.xls"); my $excel1 = $excelBook->add_worksheet("table1"); my $excel2 = $excelBook->add_worksheet("table2"); print OUT $htmlHead; my %QC = (); open QC, "<$qc_targets_txt" or die "Unable to open qc_targets_txt\n"; readline QC; print STDERR "Reading in $qc_targets_txt\n"; while (<QC>) { chomp; my @row = split(/\t/, $_); my @id = split(/[\_\.\-:]/, $row[0]); $row[-1] = 0 if ($row[-1] eq "NA"); $readCnt += $row[-1]; # DP if ($#id != 10) { $id[0] =~ /(\D+)(\d+)/; $id[0] = $2; unshift @id, $1; } if ($row[-1] >= 100) { $amp100++ } else { $ntbGenes{$row[0]}{dp} = $row[-1]; $ntbGenes{$row[0]}{id} = [@id]; } $QC{$row[0]}{QC} = [@id, @row];# if ($id[0]); foreach my $c ($row[4] .. $row[5]) { $link{$id[-3] . ":" . $c}{$row[0]} = "Assay"; } foreach my $c ($row[2] .. $row[4], $row[5] .. $row[3]) { $link{$id[-3] . ":" . $c}{$row[0]} = "LSO"; } } close QC; open RES, "<$dir/$sample.res.filtered.tsv" or die "Unable to open $dir/$sample\n"; my %uniq = (); my $colCnt = 0; my $resHead = readline(RES); chomp $resHead; $resHead =~ s/^#//; $resHead =~ s/\s+$//; my %resCol = map { $_ => $colCnt++ } split(/\t/, $resHead); my @keyColsN = qw/QUAL Gene_Name Codon_Change Amino_Acid_change vaf DP AD Tag CHROM POS ID REF ALT Context Effect_Impact Functional_Class Amino_Acid_length Gene_Coding Transcript_ID Exon_Rank/; # my @keyColsN = qw/CHROM POS ID REF ALT QUAL DP AD vaf Context Effect_Impact Functional_Class Codon_Change Amino_Acid_change Amino_Acid_length Gene_Name Coding Transcript Exon Tag/; my @keyColsI = map { $resCol{$_} } @keyColsN; foreach my $i (0 .. $#keyColsN) { print STDERR join(":", $i, $keyColsN[$i], $keyColsI[$i]) . "\n"; } print STDERR "Processing results\n"; while (<RES>) { chomp; my @row = split(/\t/, $_); my $cpos = join(":", @row[0, 1]); if (exists $link{$cpos}) { # my $key = join(":", @row[0 .. 4,6, 9,10,11,12,14 .. 21]); my $key = join(":", @row[@keyColsI]); #print STDERR join(":", @keyColsI) . "\n"; # print STDERR join(":", @row) . "\n"; sleep 1; if (not exists($uniq{$key})) { foreach my $locus (keys(%{$link{$cpos}})) { next if ($link{$cpos}{$locus} eq "LSO"); push @{$QC{$locus}{RES}}, [@row, $link{$cpos}{$locus}]; # print STDERR "Adding $key to $locus\n\n"; #sleep 1; } $uniq{$key} = 0; } else { # print STDERR $key . " : Exists\n\n"; #sleep 1; } } } close RES; ## Rplots that are not self-explanatory enough #print OUT "<img src=\"$sample/$sample.vaf.png\">"; #print OUT "<img src=\"$sample/$sample.raf.png\">"; #print OUT "<img src=\"$sample/$sample.snv-q.png\">"; #print OUT "<img src=\"$sample/$sample.snv-q-zoom.png\">"; #print OUT "<img src=\"$sample/$sample.ins-q.png\">"; #print OUT "<img src=\"$sample/$sample.del-q.png\">"; print OUT "<img src=\"$sample/$sample.amp-dp.png\">"; #print OUT "<img src=\"$sample/$sample.heat.png\">"; #print OUT "<img src=\"$sample/$sample.bias.png\">"; #print OUT "<img src=\"$sample/$sample.biasheat.png\">"; #print OUT "<img src=\"$sample/$sample.vafcut.png\">"; print OUT "<table border=1><tr><th>Download:</th><th><a href=\"$sample.tsv\">TSV</a></th>"; print OUT "<th><a href=\"$sample.xls\">XLS</a></th></table>"; print OUT "<table border=1>\n"; # my @colnames = qw/depth chr pos id ref var qual DP AD vaf context impact effectClass codonChange AAChange RefSeqLength geneName RefSeqClass RefSeqID Exon Tag/; my @colnames = ("depth", @keyColsN); print OUT "<tr>" . join("", map { "<th>$_</th>" } ("Amplicon", "c","c2", "b", @colnames)) . "</tr>"; print OUT2 join("\t", "Amplicon", @colnames) . "\n"; my $excelAref = [["Amplicon", @colnames]]; my $excelAref2 = [[qw/gene exon protein vaf func depth /]]; my %rescnt = (); foreach my $locus (sort keys(%QC)) { # my @targets = keys(%{$QC{$locus}{QC}}); # print OUT "</td>"; my $nres = 1; $nres = scalar(@{$QC{$locus}{RES}}) if ($QC{$locus}{RES}); print OUT "<tr><td rowspan=\"$nres\">$locus</td>\n"; print OUT "<td rowspan=\"$nres\"><a href=\"$sample/$sample.$locus.cov.png\">c</a></td>\n"; print OUT "<td rowspan=\"$nres\"><a href=\"$sample/$sample.$locus.cov2.png\">c2</a></td>\n"; print OUT "<td rowspan=\"$nres\"><a href=\"$sample/$sample.$locus.bias.png\">b</a></td>\n"; print OUT "<td rowspan=\"$nres\">$QC{$locus}{QC}->[-1]</td>\n"; #<td rowspan=\"$nres\">"; foreach my $res (@{$QC{$locus}{RES}}) { $res = [map {$_ || ""} @$res]; print OUT "<td>\n"; print OUT join("</td><td>\n", @{$res}[@keyColsI]); print OUT "</td></tr><tr>\n"; print OUT2 join("\t", $locus, $QC{$locus}{QC}->[-1], @{$res}[@keyColsI]) . "\n"; push @$excelAref, [$locus, $QC{$locus}{QC}->[-1], @{$res}[@keyColsI]]; # qw/QUAL Gene_Name Codon_Change Amino_Acid_change vaf DP AD Tag CHROM POS ID REF ALT Context Effect_Impact Functional_Class Amino_Acid_length Coding Transcript Exon/; push @$excelAref2, [map {$_ || "NA"} @{$res}[map { $resCol{$_} } (qw/Gene_Name Exon_Rank Amino_Acid_change vaf Functional_Class/)], $QC{$locus}{QC}->[-1]]; # push @$excelAref2, [map {$_ || "NA"} @{$res}[[qw/Gene_Name Exon_Rank Cdna_change Amino_Acid_change vaf Functional_Class/]], $QC{$locus}{QC}->[-1]]; my $pl = $res->[$keyColsI[7]]; my $ref = $res->[$keyColsI[11]]; my $var = $res->[$keyColsI[12]]; # print STDERR "$pl $ref $var\n"; sleep 1; if (length($ref) == length($var)) { $rescnt{$pl . "snp"}++; } else { $rescnt{$pl . "indel"}++; } # $rescnt++; } print STDERR $locus . ":" . $nres . "\n"; print STDERR $locus . ":" . join("-", @{$QC{$locus}{RES}}) . "\n"; if (scalar(@{$QC{$locus}{RES}}) == 0) { #$nres == 0) { print OUT2 join("\t", $locus, $QC{$locus}{QC}->[-1], ("-") x scalar(@keyColsI)) . "\n"; push @$excelAref, [$locus, $QC{$locus}{QC}->[-1], ("-") x scalar(@keyColsI)]; } print OUT "</tr>\n"; } print OUT "</table>\n"; print OUT "</body></html>\n"; close OUT; close OUT2; $excel1->write_col(0,0, $excelAref); $excel2->write_col(0,0, $excelAref2); $excelBook->close(); my @resCnt = map { $rescnt{$_} || 0 } qw/Falcosnp Falcoindel/; my $pctGood = sprintf("%.2f", $amp100 / scalar(keys(%QC)) * 100); my $ntbsAmps = join(",", map { s/^(.*?)\.chr.*$/$1/; $_; } keys(%ntbGenes)); push @{$excel0Ref}, [$sample, $runName, $readCnt, $pctGood, $ntbsAmps]; $excel0->write_col(0,0, $excel0Ref); $excelBook0->close();