";
-
-
- print $fh_out "
G2P report ";
- print $fh_out "
Input and output files:
";
-
- print $fh_out "
";
- print $fh_out "G2P list ";
- print $fh_out "" . $self->{user_params}->{file} . " ";
- print $fh_out "Log directory ";
- print $fh_out "" . $self->{user_params}->{log_dir} . " ";
- print $fh_out "HTML report ";
- print $fh_out "" . $self->{user_params}->{html_report} . " ";
- print $fh_out "TXT report ";
- print $fh_out "" . $self->{user_params}->{txt_report} . " ";
- print $fh_out " ";
-
- print $fh_out "
Counts:
";
- print $fh_out "
";
- print $fh_out "$count_g2p_genes ";
- print $fh_out "G2P genes ";
- print $fh_out "$count_in_vcf_file ";
- print $fh_out "G2P genes in input VCF file ";
- print $fh_out "$count_complete_genes ";
- print $fh_out "G2P complete genes in input VCF file ";
- print $fh_out " ";
-
-
- print $fh_out "
Summary of G2P complete genes per individual ";
- print $fh_out "
G2P complete gene: A sufficient number of variant hits for the observed allelic requirement in at least one of the gene's transcripts. Variants are filtered by frequency.
";
- print $fh_out "
Frequency thresholds and number of required variant hits for each allelic requirement:
";
-
- print $fh_out "
";
- print $fh_out "";
- print $fh_out "Allelic requirement Frequency threshold for filtering Variant counts by zygosity ";
- print $fh_out " ";
- print $fh_out "";
- foreach my $ar (sort keys %$allelic_requirements) {
- my $af = $allelic_requirements->{$ar}->{af};
- my $rules = $allelic_requirements->{$ar}->{rules};
- my $rule = join(' OR ', map {"$_ >= $rules->{$_}"} keys %$rules);
- print $fh_out "$ar $af $rule ";
- }
- print $fh_out " ";
- print $fh_out "
";
-
-my $switch =<
-
-
- Show only canonical transcript
-
-
-
-SHTML
-
- print $fh_out $switch;
-
- foreach my $individual (sort keys %$chart_data) {
- foreach my $gene_symbol (keys %{$chart_data->{$individual}}) {
- foreach my $ar (keys %{$chart_data->{$individual}->{$gene_symbol}}) {
- print $fh_out "\n";
- }
- }
- }
-
- foreach my $individual (sort keys %$chart_data) {
- foreach my $gene_symbol (keys %{$chart_data->{$individual}}) {
- foreach my $ar (keys %{$chart_data->{$individual}->{$gene_symbol}}) {
- foreach my $transcript_stable_id (keys %{$chart_data->{$individual}->{$gene_symbol}->{$ar}}) {
- my $class = ($canonical_transcripts->{$transcript_stable_id}) ? 'is_canonical' : 'not_canonical';
- print $fh_out "";
- my $name = "$individual\_$gene_symbol\_$ar\_$transcript_stable_id";
- my $title = "$individual > $gene_symbol > $ar > $transcript_stable_id";
- print $fh_out "
$title \n";
- print $fh_out "
\n";
- print $fh_out "
";
- print $fh_out "\n";
- print $fh_out "" . join('', map {"$_ "} @new_header) . " \n";
- print $fh_out " \n";
- print $fh_out "\n";
- foreach my $vf_data (@{$chart_data->{$individual}->{$gene_symbol}->{$ar}->{$transcript_stable_id}}) {
- my $data_row = $vf_data->[0];
- my @tds = ();
- foreach my $cell (@$data_row) {
- my $value = $cell->[0];
- my $class = $cell->[1];
- if ($class) {
- push @tds, "$value ";
- } else {
- push @tds, "$value ";
- }
- }
- print $fh_out "", join('', @tds), " \n";
- }
- print $fh_out " \n";
- print $fh_out "
\n";
- print $fh_out "
\n";
- print $fh_out "
\n";
- }
- }
- }
- }
- print $fh_out stats_html_tail();
-}
-
-sub chart_and_txt_data {
- my $self = shift;
- my $result_summary = shift;
- my $individuals = $result_summary->{individuals};
- my $complete_genes = $result_summary->{complete_genes};
- my $acting_ars = $result_summary->{acting_ars};
- my $new_order = $result_summary->{new_order};
-
-# my @frequencies_header = sort keys $af_key_2_population_name;
- my @frequencies_header = sort @{$self->{user_params}->{af_keys}};
-
- my $assembly = $self->{config}->{assembly};
- my $transcripts = {};
- my $canonical_transcripts = {};
- my $transcript_adaptor = $self->{config}->{ta};
- my $chart_data = {};
- my $txt_output_data = {};
-
- my $prediction2bgcolor = {
- 'probably damaging' => 'danger',
- 'deleterious' => 'danger',
- 'possibly damaging' => 'warning',
- 'unknown' => 'warning',
- 'benign' => 'success',
- 'tolerated' => 'success',
- };
-
- foreach my $individual (sort keys %$new_order) {
- foreach my $gene_symbol (keys %{$new_order->{$individual}}) {
- foreach my $ar (keys %{$new_order->{$individual}->{$gene_symbol}}) {
- foreach my $transcript_stable_id (keys %{$new_order->{$individual}->{$gene_symbol}->{$ar}}) {
- foreach my $vf_name (keys %{$new_order->{$individual}->{$gene_symbol}->{$ar}->{$transcript_stable_id}}) {
- my $data = $individuals->{$individual}->{$gene_symbol}->{$vf_name}->{$transcript_stable_id};
-
- my $hash = {};
- foreach my $pair (split/;/, $data) {
- my ($key, $value) = split('=', $pair, 2);
- $value ||= '';
- $hash->{$key} = $value;
- }
- my $vf_location = $hash->{vf_location};
- my $existing_name = $hash->{existing_name};
- if ($existing_name ne 'NA') {
- $existing_name = "$existing_name ";
- }
- my $refseq = $hash->{refseq};
- my $failed = $hash->{failed};
- my $clin_sign = $hash->{clin_sig};
- my $novel = $hash->{novel};
- my $hgvs_t = $hash->{hgvs_t};
- my $hgvs_p = $hash->{hgvs_p};
- my $allelic_requirement = $hash->{allele_requirement};
- my $observed_allelic_requirement = $hash->{ar_in_g2pdb};
- my $consequence_types = $hash->{consequence_types};
- my $zygosity = $hash->{zyg};
- my $sift_score = $hash->{sift_score} || '0.0';
- my $sift_prediction = $hash->{sift_prediction};
- my $sift = 'NA';
- my $sift_class = '';
- if ($sift_prediction ne 'NA') {
- $sift = "$sift_prediction(" . "$sift_score)";
- $sift_class = $prediction2bgcolor->{$sift_prediction};
- }
- my $polyphen_score = $hash->{polyphen_score} || '0.0';
- my $polyphen_prediction = $hash->{polyphen_prediction};
- my $polyphen = 'NA';
- my $polyphen_class = '';
- if ($polyphen_prediction ne 'NA') {
- $polyphen = "$polyphen_prediction($polyphen_score)";
- $polyphen_class = $prediction2bgcolor->{$polyphen_prediction};
- }
-
- my %frequencies_hash = ();
- if ($hash->{frequencies} ne 'NA') {
- %frequencies_hash = split /[,=]/, $hash->{frequencies};
- }
- my @frequencies = ();
- my @txt_output_frequencies = ();
- foreach my $population (@frequencies_header) {
- my $frequency = $frequencies_hash{$population} || '';
- push @frequencies, ["$frequency"];
- if ($frequency) {
- push @txt_output_frequencies, "$population=$frequency";
- }
- }
- my $is_canonical = 0;
- if ($hash->{is_canonical}) {
- $is_canonical = ($hash->{is_canonical} eq 'yes') ? 1 : 0;
- } else {
- if ($transcripts->{$transcript_stable_id}) {
- $is_canonical = 1 if ($canonical_transcripts->{$transcript_stable_id});
- } else {
- my $transcript = $transcript_adaptor->fetch_by_stable_id($transcript_stable_id);
- if ($transcript) {
- $is_canonical = $transcript->is_canonical();
- $transcripts->{$transcript_stable_id} = 1;
- $canonical_transcripts->{$transcript_stable_id} = 1 if ($is_canonical);
- }
- }
- }
- my ($location, $alleles) = split(' ', $vf_location);
- $location =~ s/\-/:/;
- $alleles =~ s/\//:/;
-
- push @{$chart_data->{$individual}->{$gene_symbol}->{$ar}->{$transcript_stable_id}}, [[
- [$vf_location],
- [$vf_name],
- [$existing_name],
- [$zygosity],
- [$observed_allelic_requirement],
- [$consequence_types],
- [$clin_sign],
- [$sift, $sift_class],
- [$polyphen, $polyphen_class],
- [$novel],
- [$failed],
- @frequencies,
- [$hgvs_t],
- [$hgvs_p],
- [$refseq]
- ], $is_canonical];
-
- my $txt_output_variant = "$location:$alleles:$zygosity:$consequence_types:SIFT=$sift:PolyPhen=$polyphen";
- if (@txt_output_frequencies) {
- $txt_output_variant .= ':' . join(',', @txt_output_frequencies);
- }
- $txt_output_data->{$individual}->{$gene_symbol}->{$ar}->{$transcript_stable_id}->{is_canonical} = $is_canonical;
- $txt_output_data->{$individual}->{$gene_symbol}->{$ar}->{$transcript_stable_id}->{REQ} = $observed_allelic_requirement;
- push @{$txt_output_data->{$individual}->{$gene_symbol}->{$ar}->{$transcript_stable_id}->{variants}}, $txt_output_variant;
- }
- }
- }
- }
- }
- return {txt_data => $txt_output_data, chart_data => $chart_data, canonical_transcripts => $canonical_transcripts};
-}
-
-sub parse_log_files {
- my $self = shift;
-
- my $log_dir = $self->{user_params}->{log_dir};
- my @files = <$log_dir/*>;
-
- my $genes = {};
- my $individuals = {};
- my $complete_genes = {};
- my $g2p_list = {};
- my $in_vcf_file = {};
- my $cache = {};
- my $acting_ars = {};
-
- my $new_order = {};
-
- foreach my $file (@files) {
- my $fh = FileHandle->new($file, 'r');
- while (<$fh>) {
- chomp;
- if (/^G2P_list/) {
- my ($flag, $gene_symbol, $DDD_category) = split/\t/;
- $g2p_list->{$gene_symbol} = 1;
- } elsif (/^G2P_in_vcf/) {
- my ($flag, $gene_symbol) = split/\t/;
- $in_vcf_file->{$gene_symbol} = 1;
- } elsif (/^G2P_complete/) {
- my ($flag, $gene_symbol, $tr_stable_id, $individual, $vf_name, $ars, $zyg) = split/\t/;
- foreach my $ar (split(',', $ars)) {
- if ($ar eq 'biallelic') {
- # homozygous, report complete
- if (uc($zyg) eq 'HOM') {
- $complete_genes->{$gene_symbol}->{$individual}->{$tr_stable_id} = 1;
- $acting_ars->{$gene_symbol}->{$individual}->{$ar} = 1;
- $new_order->{$individual}->{$gene_symbol}->{$ar}->{$tr_stable_id}->{$vf_name} = 1;
- }
- # heterozygous
- # we need to cache that we've observed one
- elsif (uc($zyg) eq 'HET') {
- if (scalar keys %{$cache->{$individual}->{$tr_stable_id}} >= 1) {
- $complete_genes->{$gene_symbol}->{$individual}->{$tr_stable_id} = 1;
- $acting_ars->{$gene_symbol}->{$individual}->{$ar} = 1;
- $new_order->{$individual}->{$gene_symbol}->{$ar}->{$tr_stable_id}->{$vf_name} = 1;
- # add first observed het variant to the list
- foreach my $vf (keys %{$cache->{$individual}->{$tr_stable_id}}) {
- $new_order->{$individual}->{$gene_symbol}->{$ar}->{$tr_stable_id}->{$vf} = 1;
- }
- }
- $cache->{$individual}->{$tr_stable_id}->{$vf_name}++;
- }
- }
- # monoallelic genes require only one allele
- elsif ($ar eq 'monoallelic' || $ar eq 'x-linked dominant' || $ar eq 'hemizygous' || $ar eq 'x-linked over-dominance') {
- $complete_genes->{$gene_symbol}->{$individual}->{$tr_stable_id} = 1;
- $acting_ars->{$gene_symbol}->{$individual}->{$ar} = 1;
- $new_order->{$individual}->{$gene_symbol}->{$ar}->{$tr_stable_id}->{$vf_name} = 1;
- }
- }
- } elsif (/^G2P_flag/) {
- my ($flag, $gene_symbol, $tr_stable_id, $individual, $vf_name, $g2p_data) = split/\t/;
- $genes->{$gene_symbol}->{"$individual\t$vf_name"}->{$tr_stable_id} = $g2p_data;
- $individuals->{$individual}->{$gene_symbol}->{$vf_name}->{$tr_stable_id} = $g2p_data;
- } else {
-
- }
- }
- $fh->close();
- }
- return {
- genes => $genes,
- individuals => $individuals,
- complete_genes => $complete_genes,
- g2p_list => $g2p_list,
- in_vcf_file => $in_vcf_file,
- acting_ars => $acting_ars,
- new_order => $new_order,
- };
-}
-
-
-sub stats_html_head {
- my $charts = shift;
-
- my $html =<
-
- VEP summary
-
-
-
-
-SHTML
- return $html;
-}
-
-sub stats_html_tail {
- my $script =<
-
-
-
-SHTML
- return "\n \n$script\n\n