annotate make_EAR.py @ 2:0efed25f6d38 draft default tip

planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
author bgruening
date Tue, 15 Oct 2024 12:52:44 +0000
parents 82450f7907ef
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
2 import argparse
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
3 import logging
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
4 import math
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
5 import os
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
6 import re
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
7 import sys
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
8 from datetime import datetime
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
9
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
10 import pytz
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
11 import requests
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
12 import yaml
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
13 from reportlab.lib import colors
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
14 from reportlab.lib.pagesizes import A4
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
15 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
16 from reportlab.lib.units import cm
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
17 from reportlab.platypus import Image, PageBreak, Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
18
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
19
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
20 # make_EAR_glxy.py
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
21 # CAUTION: This is for the Galaxy version!
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
22 # by Diego De Panis
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
23 # ERGA Sequencing and Assembly Committee
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
24 EAR_version = "v24.10.15"
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
25
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
26
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
27 def make_report(yaml_file):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
28 logging.basicConfig(filename='EAR.log', level=logging.INFO)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
29 # Read the content from EAR.yaml file
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
30 with open(yaml_file, "r") as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
31 yaml_data = yaml.safe_load(file)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
32
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
33 # FUNCTIONS ###################################################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
34
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
35 def format_number(value):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
36 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
37 value_float = float(value)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
38 if value_float.is_integer():
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
39 # format as integer if no decimal
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
40 return f'{int(value_float):,}'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
41 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
42 # format as a float
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
43 return f'{value_float:,}'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
44 except ValueError:
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
45 # return original value if can't be converted to float
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
46 return value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
47
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
48 # extract gfastats values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
49 def extract_gfastats_values(content, keys):
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
50 values = []
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
51 for key in keys:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
52 # colon-separated as default format first
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
53 match = re.search(rf"{re.escape(key)}:\s*(.+)", content)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
54 if not match:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
55 # If not try galaxy's tab-separated
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
56 match = re.search(rf"{re.escape(key)}\t(.+)", content)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
57 if match:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
58 values.append(match.group(1).strip())
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
59 else:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
60 values.append("N/A")
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
61 return values
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
62
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
63 keys = [
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
64 "Total scaffold length",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
65 "GC content %",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
66 "# gaps in scaffolds",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
67 "Total gap length in scaffolds",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
68 "# scaffolds",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
69 "Scaffold N50",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
70 "Scaffold L50",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
71 "Scaffold L90",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
72 "# contigs",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
73 "Contig N50",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
74 "Contig L50",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
75 "Contig L90",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
76 ]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
77
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
78 display_names = keys.copy()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
79 display_names[display_names.index("Total scaffold length")] = "Total bp"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
80 total_length_index = keys.index("Total scaffold length")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
81 display_names[display_names.index("GC content %")] = "GC %"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
82 display_names[display_names.index("Total gap length in scaffolds")] = "Total gap bp"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
83 display_names[display_names.index("# scaffolds")] = "Scaffolds"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
84 display_names[display_names.index("# contigs")] = "Contigs"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
85
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
86 gaps_index = keys.index("# gaps in scaffolds")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
87 exclusion_list = ["# gaps in scaffolds"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
88
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
89 # extract Total bp from gfastats report
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
90 def extract_total_bp_from_gfastats(gfastats_path):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
91 with open(gfastats_path, "r") as f:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
92 content = f.read()
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
93 # Try colon-separated format first
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
94 match = re.search(r"Total scaffold length:\s*(.+)", content)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
95 if not match:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
96 # If not found, try tab-separated format
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
97 match = re.search(r"Total scaffold length\t(.+)", content)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
98 if match:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
99 total_bp = match.group(1).replace(',', '')
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
100 return "{:,}".format(int(total_bp))
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
101 else:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
102 logging.error(f"Could not find Total scaffold length in {gfastats_path}")
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
103 return "N/A"
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
104
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
105 # compute EBP quality metric
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
106 def compute_ebp_metric(haplotype, gfastats_path, qv_value):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
107 keys_needed = ["Contig N50", "Scaffold N50"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
108 content = ''
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
109 with open(gfastats_path, "r") as f:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
110 content = f.read()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
111
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
112 values = extract_gfastats_values(content, keys_needed)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
113 contig_n50_log = math.floor(math.log10(int(values[0].replace(',', ''))))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
114 scaffold_n50_log = math.floor(math.log10(int(values[1].replace(',', ''))))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
115 return f"Obtained EBP quality metric for {haplotype}: {contig_n50_log}.{scaffold_n50_log}.Q{math.floor(float(qv_value))}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
116
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
117 # extract qv values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
118 def get_qv_value(file_path, order, tool, haplotype):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
119 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
120 with open(file_path, 'r') as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
121 lines = file.readlines()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
122 if len(lines) > order and (len(lines) == 1 or lines[2].split('\t')[0].strip() == "Both"):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
123 target_line = lines[order]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
124 fourth_column_value = target_line.split('\t')[3]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
125 return fourth_column_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
126 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
127 logging.error(f"Error reading {file_path} for tool {tool} and haplotype {haplotype}: {str(e)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
128 return ''
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
129
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
130 # extract Kmer completeness values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
131 def get_completeness_value(file_path, order, tool, haplotype):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
132 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
133 with open(file_path, 'r') as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
134 lines = file.readlines()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
135 if len(lines) > order:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
136 target_line = lines[order]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
137 fifth_column_value = target_line.split('\t')[4].strip()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
138 return fifth_column_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
139 except Exception as e:
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
140 logging.error(f"Error reading {file_path} for tool {tool} and haplotype {haplotype}: {str(e)}")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
141 return ''
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
142
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
143 # get unique part in file names
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
144 def find_unique_parts(file1, file2):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
145 # Split filenames into parts
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
146 parts1 = file1.split('.')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
147 parts2 = file2.split('.')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
148 # Find unique parts
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
149 unique_parts1 = [part for part in parts1 if part not in parts2]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
150 unique_parts2 = [part for part in parts2 if part not in parts1]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
151 return ' '.join(unique_parts1), ' '.join(unique_parts2)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
152
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
153 # extract BUSCO values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
154 def extract_busco_values(file_path):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
155 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
156 with open(file_path, 'r') as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
157 content = file.read()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
158 results_line = re.findall(r"C:.*n:\d+", content)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
159 s_value = re.findall(r"S:(\d+\.\d+%)", results_line)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
160 d_value = re.findall(r"D:(\d+\.\d+%)", results_line)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
161 f_value = re.findall(r"F:(\d+\.\d+%)", results_line)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
162 m_value = re.findall(r"M:(\d+\.\d+%)", results_line)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
163 return s_value, d_value, f_value, m_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
164 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
165 logging.warning(f"Error reading {file_path}: {str(e)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
166 return '', '', '', ''
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
167
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
168 # extract BUSCO info
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
169 def extract_busco_info(file_path):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
170 busco_version = None
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
171 lineage_info = None
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
172 busco_mode = None
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
173 busco_pred = None
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
174
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
175 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
176 with open(file_path, 'r') as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
177 content = file.read()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
178 version_match = re.search(r"# BUSCO version is: ([\d.]+)", content)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
179 if version_match:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
180 busco_version = version_match.group(1)
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
181 lineage_match = re.search(r"The lineage dataset is: (.*?) \(Creation date:.*?, number of (genomes|species): (\d+), number of BUSCOs: (\d+)\)", content)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
182 if lineage_match:
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
183 lineage_info = (lineage_match.group(1), lineage_match.group(3), lineage_match.group(4))
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
184 mode_match = re.search(r"# BUSCO was run in mode: (\w+)", content)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
185 if mode_match:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
186 busco_mode = mode_match.group(1)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
187 pred_match = re.search(r"# Gene predictor used: (\w+)", content)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
188 if pred_match:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
189 busco_pred = pred_match.group(1)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
190
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
191 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
192 logging.warning(f"Error reading {file_path}: {str(e)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
193
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
194 return busco_version, lineage_info, busco_mode, busco_pred
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
195
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
196 # Function to check and generate warning messages
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
197 def generate_warning_paragraphs(expected, observed, trait):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
198 paragraphs = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
199 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
200 if trait == "Haploid size (bp)":
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
201 expected_val = int(expected.replace(',', ''))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
202 observed_val = int(observed.replace(',', ''))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
203 if abs(expected_val - observed_val) / expected_val > 0.20:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
204 message = f". Observed {trait} has >20% difference with Expected"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
205 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
206 elif trait in ["Haploid Number", "Ploidy"]:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
207 # Ensure both values are integers for comparison
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
208 expected_val = int(expected)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
209 observed_val = int(observed)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
210 if expected_val != observed_val:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
211 message = f". Observed {trait} is different from Expected"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
212 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
213 elif trait == "Sample Sex":
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
214 # Compare case-insensitive and trimmed strings
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
215 if expected.strip().lower() != observed.strip().lower():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
216 message = ". Observed sex is different from Sample sex"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
217 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
218 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
219 logging.warning(f"Error in generating warning for {trait}: {str(e)}")
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
220
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
221 return paragraphs
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
222
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
223 # Generate warnings for curated haplotypes (qv, kcomp, busco)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
224 def generate_curated_warnings(haplotype, qv_value, completeness_value, busco_scores):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
225 paragraphs = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
226 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
227 # Ensure values are correctly interpreted as floats
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
228 qv_val = float(qv_value)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
229 completeness_val = float(completeness_value)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
230 s_value = float(busco_scores[0].rstrip('%'))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
231 d_value = float(busco_scores[1].rstrip('%'))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
232
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
233 # Check QV value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
234 if qv_val < 40:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
235 message = f". QV value is less than 40 for {haplotype}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
236 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
237
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
238 # Check Kmer completeness value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
239 if completeness_val < 90:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
240 message = f". Kmer completeness value is less than 90 for {haplotype}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
241 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
242
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
243 # Check BUSCO s_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
244 if s_value < 90:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
245 message = f". BUSCO single copy value is less than 90% for {haplotype}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
246 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
247
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
248 # Check BUSCO d_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
249 if d_value > 5:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
250 message = f". BUSCO duplicated value is more than 5% for {haplotype}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
251 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
252
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
253 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
254 logging.warning(f"Error in generating warnings for {haplotype}: {str(e)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
255
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
256 return paragraphs
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
257
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
258 # Generate warnings for curated haplotypes (loss, gaps, 90inChrom)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
259 def generate_assembly_warnings(asm_data, gaps_per_gbp_data, obs_haploid_num):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
260 warnings = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
261
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
262 # Iterate over haplotypes and generate warnings based on the criteria
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
263 for haplotype in asm_stages:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
264 pre_curation_bp = extract_total_bp_from_gfastats(asm_data['Pre-curation'][haplotype]['gfastats--nstar-report_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
265 curated_bp = extract_total_bp_from_gfastats(asm_data['Curated'][haplotype]['gfastats--nstar-report_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
266 scaffold_l90 = float(gfastats_data[('Curated', haplotype)][display_names.index('Scaffold L90')].replace(',', ''))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
267
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
268 # Check for assembly length loss > 3%
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
269 if pre_curation_bp and curated_bp:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
270 loss_percentage = (float(pre_curation_bp.replace(',', '')) - float(curated_bp.replace(',', ''))) / float(pre_curation_bp.replace(',', '')) * 100
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
271 if loss_percentage > 3:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
272 warnings.append(Paragraph(f". Assembly length loss > 3% for {haplotype}", styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
273
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
274 # Check for more than 1000 gaps/Gbp
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
275 gaps_gbp = gaps_per_gbp_data.get(('Curated', haplotype), 0)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
276 if gaps_gbp > 1000:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
277 warnings.append(Paragraph(f". More than 1000 gaps/Gbp for {haplotype}", styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
278
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
279 # Check if Scaffold L90 value is more than Observed Haploid number
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
280 if scaffold_l90 > float(obs_haploid_num):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
281 warnings.append(Paragraph(f". Not 90% of assembly in chromosomes for {haplotype}", styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
282
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
283 return warnings
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
284
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
285 # Parse pipeline and generate "tree"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
286 def generate_pipeline_tree(pipeline_data):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
287 tree_lines = []
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
288 indent = "&nbsp;" * 2 # Adjust indent spacing
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
289
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
290 if isinstance(pipeline_data, dict):
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
291 for tool, version_param in pipeline_data.items():
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
292 # Tool line
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
293 tool_line = f"- <b>{tool}</b>"
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
294 tree_lines.append(tool_line)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
295
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
296 # Convert version_param to string and split
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
297 version_param_str = str(version_param)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
298 parts = version_param_str.split('/')
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
299 version = parts[0]
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
300 params = [p for p in parts[1:] if p] # This will remove empty strings
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
301
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
302 # Version line
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
303 version_line = f"{indent * 2}|_ <i>ver:</i> {version}"
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
304 tree_lines.append(version_line)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
305
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
306 # Param line(s)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
307 if params:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
308 for param in params:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
309 param_line = f"{indent * 2}|_ <i>key param:</i> {param}"
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
310 tree_lines.append(param_line)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
311 else:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
312 param_line = f"{indent * 2}|_ <i>key param:</i> NA"
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
313 tree_lines.append(param_line)
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
314 else:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
315 tree_lines.append("Invalid pipeline data format")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
316
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
317 # Join lines with HTML break for paragraph
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
318 tree_diagram = "<br/>".join(tree_lines)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
319 return tree_diagram
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
320
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
321 # Reading SAMPLE INFORMATION section from yaml ################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
322
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
323 # Check for required fields
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
324 required_fields = ["ToLID", "Species", "Sex", "Submitter", "Affiliation", "Tags"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
325 missing_fields = [field for field in required_fields if field not in yaml_data or not yaml_data[field]]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
326
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
327 if missing_fields:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
328 logging.error(f"# GENERAL INFORMATION section in the yaml file is missing or empty for the following information: {', '.join(missing_fields)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
329 sys.exit(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
330
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
331 # Check that "Species" field is a string
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
332 if not isinstance(yaml_data["Species"], str):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
333 logging.error(f"# GENERAL INFORMATION section in the yaml file contains incorrect data type for 'Species'. Expected 'str' but got '{type(yaml_data['Species']).__name__}'.")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
334 sys.exit(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
335
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
336 # Get data for Header, ToLID table and submitter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
337 tol_id = yaml_data["ToLID"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
338 species = yaml_data["Species"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
339 sex = yaml_data["Sex"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
340 submitter = yaml_data["Submitter"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
341 affiliation = yaml_data["Affiliation"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
342 tags = yaml_data["Tags"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
343
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
344 # Check if tag is valid
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
345 valid_tags = ["ERGA-BGE", "ERGA-Pilot", "ERGA-Community", "ERGA-testing"]
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
346 if tags not in valid_tags:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
347 tags += "[INVALID TAG]"
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
348 logging.warning("# SAMPLE INFORMATION section in the yaml file contains an invalid tag. Valid tags are ERGA-BGE, ERGA-Pilot and ERGA-Community.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
349
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
350 # Get data from GoaT based on species name
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
351 # urllib.parse.quote to handle special characters and spaces in the species name
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
352 species_name = requests.utils.quote(species)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
353
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
354 # Get stuff from GoaT
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
355 goat_response = requests.get(f'https://goat.genomehubs.org/api/v2/search?query=tax_name%28{species_name}%29&result=taxon')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
356 goat_data = goat_response.json() # convert json to dict
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
357
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
358 taxon_number = goat_data['results'][0]['result']['taxon_id']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
359
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
360 goat_results = goat_data['results']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
361
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
362 class_name = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
363 order_name = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
364 haploid_number = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
365 haploid_source = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
366 ploidy = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
367 ploidy_source = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
368
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
369 for result in goat_results:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
370 lineage = result['result']['lineage']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
371 for node in lineage:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
372 if node['taxon_rank'] == 'class':
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
373 class_name = node['scientific_name']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
374 if node['taxon_rank'] == 'order':
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
375 order_name = node['scientific_name']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
376
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
377 goat_second_response = requests.get(f'https://goat.genomehubs.org/api/v2/record?recordId={taxon_number}&result=taxon&taxonomy=ncbi')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
378 goat_second_data = goat_second_response.json()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
379
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
380 ploidy_info = goat_second_data['records'][0]['record']['attributes']['ploidy']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
381
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
382 ploidy = ploidy_info['value']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
383 ploidy_source = ploidy_info['aggregation_source']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
384
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
385 haploid_info = goat_second_data['records'][0]['record']['attributes']['haploid_number']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
386
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
387 haploid_number = haploid_info['value']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
388 haploid_source = haploid_info['aggregation_source']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
389
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
390 sp_data = [
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
391 ["TxID", "ToLID", "Species", "Class", "Order"],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
392 [taxon_number, tol_id, species, class_name, order_name]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
393 ]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
394
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
395 # Transpose the data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
396 transposed_sp_data = list(map(list, zip(*sp_data)))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
397
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
398 # Reading SEQUENCING DATA section from yaml ###################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
399
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
400 # get DATA section from yaml
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
401 data_list = yaml_data.get('DATA', [])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
402
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
403 # Prepare headers
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
404 headers = ['Data']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
405 data_values = ['Coverage']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
406
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
407 # Extract data from YAML and format it for the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
408 for item in data_list:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
409 for technology, coverage in item.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
410 headers.append(technology)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
411 data_values.append('NA' if not coverage else coverage)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
412
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
413 # Create a list of lists for the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
414 table_data = [headers, data_values]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
415
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
416 # Extract pipeline data
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
417 asm_pipeline_data = yaml_data.get('PIPELINES', {}).get('Assembly', {})
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
418 curation_pipeline_data = yaml_data.get('PIPELINES', {}).get('Curation', {})
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
419
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
420 # Extract pipeline data from 'Curated' category
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
421 asm_pipeline_tree = generate_pipeline_tree(asm_pipeline_data)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
422 curation_pipeline_tree = generate_pipeline_tree(curation_pipeline_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
423
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
424 # Reading GENOME PROFILING DATA section from yaml #############################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
425 profiling_data = yaml_data.get('PROFILING')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
426
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
427 # Check if profiling_data is available
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
428 if not profiling_data:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
429 logging.error('Error: No profiling data found in the YAML file.')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
430 sys.exit(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
431
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
432 # Check for GenomeScope data (mandatory)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
433 genomescope_data = profiling_data.get('GenomeScope')
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
434 if not genomescope_data:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
435 logging.error("Error: GenomeScope data is missing in the YAML file. This is mandatory.")
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
436 sys.exit(1)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
437
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
438 genomescope_summary = genomescope_data.get('genomescope_summary_txt')
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
439 if not genomescope_summary:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
440 logging.error("Error: GenomeScope summary file path is missing in the YAML file.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
441 sys.exit(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
442
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
443 # Read the content of the GenomeScope summary file
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
444 try:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
445 with open(genomescope_summary, "r") as f:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
446 summary_txt = f.read()
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
447 # Extract values from summary.txt
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
448 genome_haploid_length = re.search(r"Genome Haploid Length\s+([\d,]+) bp", summary_txt).group(1)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
449 proposed_ploidy = re.search(r"p = (\d+)", summary_txt).group(1)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
450 except Exception as e:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
451 logging.error(f"Error reading GenomeScope summary file: {str(e)}")
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
452 sys.exit(1)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
453
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
454 # Check for Smudgeplot data (optional)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
455 smudgeplot_data = profiling_data.get('Smudgeplot')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
456 if smudgeplot_data:
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
457 smudgeplot_summary = smudgeplot_data.get('smudgeplot_verbose_summary_txt')
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
458 if smudgeplot_summary:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
459 try:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
460 with open(smudgeplot_summary, "r") as f:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
461 smud_summary_txt = f.readlines()
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
462 for line in smud_summary_txt:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
463 if line.startswith("* Proposed ploidy"):
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
464 proposed_ploidy = line.split(":")[1].strip()
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
465 break
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
466 except Exception as e:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
467 logging.warning(f"Error reading Smudgeplot summary file: {str(e)}. Using GenomeScope ploidy.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
468 else:
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
469 logging.warning("Smudgeplot summary file path is missing. Using GenomeScope ploidy.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
470 else:
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
471 logging.info("Smudgeplot data not provided. Using GenomeScope ploidy.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
472
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
473 # Reading ASSEMBLY DATA section from yaml #####################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
474
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
475 asm_data = yaml_data.get('ASSEMBLIES', {})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
476
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
477 # make a list from the assemblies available in asm_data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
478 asm_stages = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
479 for asm_stage, stage_properties in asm_data.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
480 for haplotypes in stage_properties.keys():
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
481 if haplotypes not in asm_stages:
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
482 asm_stages.append(haplotypes)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
483
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
484 # get gfastats-based data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
485 gfastats_data = {}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
486 for asm_stage, stage_properties in asm_data.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
487 for haplotypes, haplotype_properties in stage_properties.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
488 if isinstance(haplotype_properties, dict):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
489 if 'gfastats--nstar-report_txt' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
490 file_path = haplotype_properties['gfastats--nstar-report_txt']
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
491 try:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
492 with open(file_path, 'r') as file:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
493 content = file.read()
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
494 gfastats_data[(asm_stage, haplotypes)] = extract_gfastats_values(content, keys)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
495 except FileNotFoundError:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
496 logging.error(f"Gfastats file not found: {file_path}")
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
497 except Exception as e:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
498 logging.error(f"Error processing gfastats file {file_path}: {str(e)}")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
499
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
500 gaps_per_gbp_data = {}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
501 for (asm_stage, haplotypes), values in gfastats_data.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
502 try:
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
503 gaps = float(values[gaps_index].replace(',', ''))
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
504 total_length = float(values[total_length_index].replace(',', ''))
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
505 if total_length > 0:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
506 gaps_per_gbp = round((gaps / total_length * 1_000_000_000), 2)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
507 gaps_per_gbp_data[(asm_stage, haplotypes)] = gaps_per_gbp
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
508 else:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
509 logging.warning(f"Total length is zero for {asm_stage} {haplotypes}")
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
510 gaps_per_gbp_data[(asm_stage, haplotypes)] = 'N/A'
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
511 except (ValueError, IndexError):
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
512 logging.warning(f"Could not calculate gaps per Gbp for {asm_stage} {haplotypes}")
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
513 gaps_per_gbp_data[(asm_stage, haplotypes)] = 'N/A'
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
514
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
515 # Define the contigging table (column names)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
516 asm_table_data = [["Metrics"] + [f'{asm_stage} \n {haplotypes}' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
517
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
518 # Fill the table with the gfastats data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
519 for i in range(len(display_names)):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
520 metric = display_names[i]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
521 if metric not in exclusion_list:
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
522 asm_table_data.append([metric] + [format_number(gfastats_data.get((asm_stage, haplotypes), ['N/A'] * len(keys))[i]) if (asm_stage, haplotypes) in gfastats_data else 'N/A' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
523
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
524 # Add the gaps/gbp in between
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
525 asm_table_data.insert(gaps_index + 1, ['Gaps/Gbp'] + [format_number(gaps_per_gbp_data.get((asm_stage, haplotypes), 'N/A')) for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
526 # get QV, Kmer completeness and BUSCO data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
527 qv_data = {}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
528 completeness_data = {}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
529 busco_data = {metric: {} for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
530 for asm_stage, stage_properties in asm_data.items():
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
531 asm_stage_elements = list(stage_properties.keys())
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
532 for i, haplotypes in enumerate(asm_stage_elements):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
533 haplotype_properties = stage_properties[haplotypes]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
534 if isinstance(haplotype_properties, dict):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
535 if 'merqury_qv' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
536 qv_data[(asm_stage, haplotypes)] = get_qv_value(haplotype_properties['merqury_qv'], i, asm_stage, haplotypes)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
537 if 'merqury_completeness_stats' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
538 completeness_data[(asm_stage, haplotypes)] = get_completeness_value(haplotype_properties['merqury_completeness_stats'], i, asm_stage, haplotypes)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
539 if 'busco_short_summary_txt' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
540 s_value, d_value, f_value, m_value = extract_busco_values(haplotype_properties['busco_short_summary_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
541 busco_data['BUSCO sing.'].update({(asm_stage, haplotypes): s_value})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
542 busco_data['BUSCO dupl.'].update({(asm_stage, haplotypes): d_value})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
543 busco_data['BUSCO frag.'].update({(asm_stage, haplotypes): f_value})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
544 busco_data['BUSCO miss.'].update({(asm_stage, haplotypes): m_value})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
545
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
546 # Fill the table with the QV data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
547 asm_table_data.append(['QV'] + [qv_data.get((asm_stage, haplotypes), '') for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
548
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
549 # Fill the table with the Kmer completeness data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
550 asm_table_data.append(['Kmer compl.'] + [completeness_data.get((asm_stage, haplotypes), '') for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
551
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
552 # Fill the table with the BUSCO data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
553 for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
554 asm_table_data.append([metric] + [busco_data[metric].get((asm_stage, haplotypes), '') for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
555
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
556 # Reading CURATION NOTES section from yaml ####################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
557
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
558 obs_haploid_num = yaml_data.get("NOTES", {}).get("Obs_Haploid_num", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
559 obs_sex = yaml_data.get("NOTES", {}).get("Obs_Sex", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
560 interventions_per_gb = yaml_data.get("NOTES", {}).get("Interventions_per_Gb", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
561 contamination_notes = yaml_data.get("NOTES", {}).get("Contamination_notes", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
562 other_notes = yaml_data.get("NOTES", {}).get("Other_notes", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
563
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
564 # Extract Total bp for each haplotype and find the maximum
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
565 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
566 total_bp_values = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
567 for haplotype, properties in curated_assemblies.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
568 if 'gfastats--nstar-report_txt' in properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
569 total_bp = extract_total_bp_from_gfastats(properties['gfastats--nstar-report_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
570 total_bp_values.append(total_bp)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
571
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
572 max_total_bp = max(total_bp_values, default='NA')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
573
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
574 # Create table data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
575 genome_traits_table_data = [
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
576 ["Genome Traits", "Expected", "Observed"],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
577 ["Haploid size (bp)", genome_haploid_length, f"{max_total_bp}"],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
578 ["Haploid Number", f"{haploid_number} (source: {haploid_source})", obs_haploid_num],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
579 ["Ploidy", f"{ploidy} (source: {ploidy_source})", proposed_ploidy],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
580 ["Sample Sex", sex, obs_sex]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
581 ]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
582
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
583 # Get curator notes
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
584 curator_notes_text = (
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
585 f". Interventions/Gb: {interventions_per_gb}<br/>"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
586 f". Contamination notes: &quot;{contamination_notes}&quot;<br/>"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
587 f". Other observations: &quot;{other_notes}&quot;"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
588 )
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
589
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
590 # PDF CONSTRUCTION ############################################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
591
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
592 # Set up the PDF file
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
593 pdf_filename = "EAR.pdf"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
594 margin = 0.5 * 72 # 0.5 inch in points (normal margin is 1 inch)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
595 pdf = SimpleDocTemplate(pdf_filename,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
596 pagesize=A4,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
597 leftMargin=margin,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
598 rightMargin=margin,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
599 topMargin=margin,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
600 bottomMargin=margin)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
601 elements = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
602
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
603 # Set all the styles
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
604 styles = getSampleStyleSheet()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
605 styles.add(ParagraphStyle(name='TitleStyle', fontName='Courier', fontSize=20))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
606 styles.add(ParagraphStyle(name='subTitleStyle', fontName='Courier', fontSize=16))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
607 styles.add(ParagraphStyle(name='normalStyle', fontName='Courier', fontSize=12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
608 styles.add(ParagraphStyle(name='midiStyle', fontName='Courier', fontSize=10))
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
609 # styles.add(ParagraphStyle(name='LinkStyle', fontName='Courier', fontSize=10, textColor='blue', underline=True))
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
610 styles.add(ParagraphStyle(name='treeStyle', fontName='Courier', fontSize=10, leftIndent=12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
611 styles.add(ParagraphStyle(name='miniStyle', fontName='Courier', fontSize=8))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
612 styles.add(ParagraphStyle(name='FileNameStyle', fontName='Courier', fontSize=6))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
613
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
614 # PDF SECTION 1 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
615
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
616 # Add the title
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
617 title = Paragraph("ERGA Assembly Report", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
618 elements.append(title)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
619
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
620 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
621 elements.append(Spacer(1, 12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
622
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
623 # Add version
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
624 ver_paragraph = Paragraph(EAR_version, styles['normalStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
625 elements.append(ver_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
626
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
627 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
628 elements.append(Spacer(1, 12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
629
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
630 # Add tags
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
631 tags_paragraph = Paragraph(f"Tags: {tags}", styles['normalStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
632 elements.append(tags_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
633
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
634 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
635 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
636
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
637 # Create the SPECIES DATA table with the transposed data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
638 sp_data_table = Table(transposed_sp_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
639
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
640 # Style the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
641 sp_data_table.setStyle(TableStyle([
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
642 ("BACKGROUND", (0, 0), (0, -1), '#e7e7e7'), # Grey background for column 1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
643 ("BACKGROUND", (1, 0), (1, -1), colors.white), # White background for column 2
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
644 ("ALIGN", (0, 0), (-1, -1), "CENTER"),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
645 ('FONTNAME', (0, 0), (0, 0), 'Courier'), # Regular font for row1, col1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
646 ('FONTNAME', (1, 0), (1, 0), 'Courier'),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
647 ('FONTNAME', (0, 1), (-1, -1), 'Courier'), # Regular font for the rest of the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
648 ('FONTNAME', (1, 1), (1, 1), 'Courier-Bold'), # Bold font for row1, col2
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
649 ("FONTSIZE", (0, 0), (-1, -1), 14),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
650 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
651 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
652 ]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
653
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
654 # Add SPECIES DATA table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
655 elements.append(sp_data_table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
656
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
657 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
658 elements.append(Spacer(1, 32))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
659
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
660 # Create the GENOME TRAITS table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
661 genome_traits_table = Table(genome_traits_table_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
662
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
663 # Style the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
664 genome_traits_table.setStyle(TableStyle([
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
665 ('BACKGROUND', (0, 0), (0, -1), '#e7e7e7'),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
666 ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
667 ('FONTNAME', (0, 0), (-1, -1), 'Courier'),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
668 ('FONTSIZE', (0, 0), (-1, -1), 12),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
669 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
670 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
671 ]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
672
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
673 # Add GENOME TRAITS table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
674 elements.append(genome_traits_table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
675
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
676 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
677 elements.append(Spacer(1, 28))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
678
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
679 # Add EBP METRICS SECTION subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
680 subtitle = Paragraph("EBP metrics summary and curation notes", styles['subTitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
681 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
682
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
683 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
684 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
685
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
686 # Iterate over haplotypes in the Curated category to get data for EBP metrics
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
687 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
688 haplotype_names = list(curated_assemblies.keys())
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
689
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
690 for haplotype in haplotype_names:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
691 properties = curated_assemblies[haplotype]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
692 if 'gfastats--nstar-report_txt' in properties and 'merqury_qv' in properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
693 gfastats_path = properties['gfastats--nstar-report_txt']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
694 order = haplotype_names.index(haplotype) # Determine the order based on the position of the haplotype in the list
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
695 qv_value = get_qv_value(properties['merqury_qv'], order, 'Curated', haplotype)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
696
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
697 ebp_quality_metric = compute_ebp_metric(haplotype, gfastats_path, qv_value)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
698 EBP_metric_paragraph = Paragraph(ebp_quality_metric, styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
699
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
700 # Add the EBP quality metric paragraph to elements
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
701 elements.append(EBP_metric_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
702
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
703 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
704 elements.append(Spacer(1, 8))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
705
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
706 # Add sentence
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
707 Textline = Paragraph("The following metrics were automatically flagged as below EBP recommended standards or different from expected:", styles['midiStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
708 elements.append(Textline)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
709
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
710 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
711 elements.append(Spacer(1, 4))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
712
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
713 # Apply checks and add warning paragraphs to elements
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
714 elements += generate_warning_paragraphs(genome_haploid_length, max_total_bp, "Haploid size (bp)")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
715 elements += generate_warning_paragraphs(haploid_number, obs_haploid_num, "Haploid Number")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
716 elements += generate_warning_paragraphs(proposed_ploidy, ploidy, "Ploidy")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
717 elements += generate_warning_paragraphs(sex, obs_sex, "Sample Sex")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
718
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
719 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
720 elements.append(Spacer(1, 4))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
721
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
722 # Iterate over haplotypes in the Curated category and apply checks
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
723 for haplotype in haplotype_names:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
724 properties = curated_assemblies[haplotype]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
725 if isinstance(properties, dict) and 'merqury_qv' in properties and 'merqury_completeness_stats' in properties and 'busco_short_summary_txt' in properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
726 order = haplotype_names.index(haplotype)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
727 qv_value = get_qv_value(properties['merqury_qv'], order, "Curated", haplotype)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
728 completeness_value = get_completeness_value(properties['merqury_completeness_stats'], order, "Curated", haplotype)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
729 busco_scores = extract_busco_values(properties['busco_short_summary_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
730
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
731 warnings = generate_curated_warnings(haplotype, qv_value, completeness_value, busco_scores)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
732 elements += warnings
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
733
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
734 assembly_warnings = generate_assembly_warnings(asm_data, gaps_per_gbp_data, obs_haploid_num)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
735 elements.extend(assembly_warnings)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
736
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
737 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
738 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
739
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
740 # Add small subtitle for Curator notes
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
741 subtitle = Paragraph("Curator notes", styles['normalStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
742 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
743
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
744 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
745 elements.append(Spacer(1, 8))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
746
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
747 # Curator notes
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
748 curator_notes_paragraph = Paragraph(curator_notes_text, styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
749 elements.append(curator_notes_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
750
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
751 # Page break
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
752 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
753
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
754 # PDF SECTION 2 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
755
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
756 # Add quality metrics section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
757 subtitle = Paragraph("Quality metrics table", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
758 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
759
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
760 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
761 elements.append(Spacer(1, 48))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
762
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
763 # create QUALITY METRICS table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
764 asm_table = Table(asm_table_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
765
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
766 # Style the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
767 asm_table.setStyle(TableStyle([
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
768 ('BACKGROUND', (0, 0), (-1, 0), '#e7e7e7'), # grey background for the header
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
769 ('ALIGN', (0, 0), (-1, -1), 'CENTER'), # center alignment
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
770 ('FONTNAME', (0, 0), (-1, -1), 'Courier'), # bold font for the header
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
771 ('FONTSIZE', (0, 0), (-1, -1), 11), # font size
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
772 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
773 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
774 ]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
775
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
776 # Add QUALITY METRICS table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
777 elements.append(asm_table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
778
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
779 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
780 elements.append(Spacer(1, 5))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
781
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
782 # Store BUSCO information from each file in a list
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
783 busco_info_list = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
784 for asm_stages, stage_properties in asm_data.items():
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
785 for i, haplotype_properties in stage_properties.items():
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
786 if isinstance(haplotype_properties, dict):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
787 if 'busco_short_summary_txt' in haplotype_properties:
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
788 busco_info = extract_busco_info(haplotype_properties['busco_short_summary_txt'])
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
789 if all(busco_info):
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
790 busco_info_list.append(busco_info)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
791
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
792 # Function to format BUSCO information
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
793 def format_busco_info(info):
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
794 version, (lineage, genomes, buscos), mode, predictor = info
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
795 return f"BUSCO: {version} ({mode}, {predictor}) / Lineage: {lineage} (genomes:{genomes}, BUSCOs:{buscos})"
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
796
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
797 # Checking if all elements in the list are identical
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
798 if busco_info_list and all(info == busco_info_list[0] for info in busco_info_list):
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
799 busco_text = format_busco_info(busco_info_list[0])
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
800 elements.append(Paragraph(busco_text, styles['miniStyle']))
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
801 else:
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
802 elements.append(Paragraph("Warning! BUSCO versions or lineage datasets are not the same across results:", styles['miniStyle']))
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
803 logging.warning("WARNING: BUSCO versions or lineage datasets are not the same across results")
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
804 for info in busco_info_list:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
805 busco_text = format_busco_info(info)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
806 elements.append(Paragraph(busco_text, styles['miniStyle']))
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
807
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
808 # Page break
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
809 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
810
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
811 # PDF SECTION 3 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
812
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
813 # Add hic maps section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
814 subtitle = Paragraph("HiC contact map of curated assembly", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
815 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
816
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
817 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
818 elements.append(Spacer(1, 36))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
819
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
820 # Initialize counter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
821 tool_count = 0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
822
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
823 # Add title and images for each step
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
824 for asm_stages, stage_properties in asm_data.items():
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
825 if asm_stages == 'Curated':
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
826 tool_elements = list(stage_properties.keys())
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
827
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
828 images_with_names = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
829
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
830 for haplotype in tool_elements:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
831 haplotype_properties = stage_properties[haplotype]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
832
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
833 # Check if there is an image and/or a link
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
834 png_file = haplotype_properties.get('hic_FullMap_png', '')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
835 link = haplotype_properties.get('hic_FullMap_link', '')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
836
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
837 # Prepare paragraphs for the image and link
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
838 if png_file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
839 # Create image object
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
840 img = Image(png_file, width=11 * cm, height=11 * cm)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
841 images_with_names.append([img])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
842 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
843 # Add paragraph for missing image
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
844 missing_png_paragraph = Paragraph(f"<b>{haplotype}</b> HiC PNG is missing!", styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
845 images_with_names.append([missing_png_paragraph])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
846
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
847 # Add paragraph for the link
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
848 if link:
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
849 link_html = f'<b>{haplotype}</b> <link href="{link}" color="blue">[non-permanent LINK]</link>'
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
850 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
851 link_html = f'<b>{haplotype}</b> File link is missing!'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
852
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
853 link_paragraph = Paragraph(link_html, styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
854 images_with_names.append([link_paragraph])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
855
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
856 # Append a spacer only if the next element is an image
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
857 if len(tool_elements) > 1 and tool_elements.index(haplotype) < len(tool_elements) - 1:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
858 images_with_names.append([Spacer(1, 12)])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
859
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
860 # Add images and names to the elements in pairs
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
861 for i in range(0, len(images_with_names), 4): # Process two images (and their names) at a time
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
862 elements_to_add = images_with_names[i: i + 4]
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
863
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
864 # Create table for the images and names
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
865 table = Table(elements_to_add)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
866 table.hAlign = 'CENTER'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
867 elements.append(table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
868
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
869 # Add a page break conditionally
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
870 next_elements_start = i + 4
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
871 if next_elements_start < len(images_with_names):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
872 if len(images_with_names[next_elements_start]) > 0 and isinstance(images_with_names[next_elements_start][0], Image):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
873 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
874
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
875 tool_count += 1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
876
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
877 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
878
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
879 # PDF SECTION 4 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
880
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
881 # Add kmer spectra section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
882 subtitle = Paragraph("K-mer spectra of curated assembly", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
883 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
884
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
885 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
886 elements.append(Spacer(1, 48))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
887
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
888 # Initialize counter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
889 counter = 0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
890
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
891 # Iterate over haplotypes in the Curated category to get K-mer spectra images
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
892 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
893
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
894 # Get paths for spectra files
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
895 spectra_files = {'common': {}}
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
896 for assembly_type, assembly_data in curated_assemblies.items():
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
897 if 'merqury_hap_spectra_cn_png' in assembly_data:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
898 spectra_files[assembly_type] = {'spectra_cn_png': assembly_data['merqury_hap_spectra_cn_png']}
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
899 if 'merqury_spectra_cn_png' in assembly_data:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
900 spectra_files['common']['spectra_cn_png'] = assembly_data['merqury_spectra_cn_png']
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
901 if 'merqury_spectra_asm_png' in assembly_data:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
902 spectra_files['common']['spectra_asm_png'] = assembly_data['merqury_spectra_asm_png']
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
903
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
904 # Determine the number of spectra-cn files and assign unique names if needed
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
905 spectra_cn_files = [
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
906 file_dict.get('spectra_cn_png', None)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
907 for file_dict in spectra_files.values()
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
908 if file_dict.get('spectra_cn_png')
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
909 ]
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
910 spectra_cn_files = list(set(spectra_cn_files)) # Remove duplicates
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
911
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
912 if len(spectra_cn_files) == 3:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
913 shortest_spectra_cn_file = min(spectra_cn_files, key=lambda f: len(os.path.basename(f)), default=None)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
914 similar_files = [f for f in spectra_cn_files if f != shortest_spectra_cn_file]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
915 if similar_files:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
916 unique_name1, unique_name2 = find_unique_parts(os.path.basename(similar_files[0]), os.path.basename(similar_files[1]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
917 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
918 shortest_spectra_cn_file = spectra_cn_files[0] if spectra_cn_files else None
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
919 # unique_name1 = unique_name2 = None
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
920
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
921 # Create image objects and add filename below each image
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
922 images = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
923 for label, file_dict in spectra_files.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
924 for key, png_file in file_dict.items():
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
925 if png_file and os.path.exists(png_file):
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
926 try:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
927 image = Image(png_file, width=8.4 * cm, height=7 * cm)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
928 filename = os.path.basename(png_file)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
929
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
930 if filename.endswith("spectra-asm.ln.png"):
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
931 text = "Distribution of k-mer counts coloured by their presence in reads/assemblies"
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
932 elif filename.endswith("spectra-cn.ln.png"):
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
933 if len(spectra_cn_files) == 3:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
934 if png_file == shortest_spectra_cn_file:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
935 text = "Distribution of k-mer counts per copy numbers found in asm (dipl.)"
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
936 else:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
937 text = f"Distribution of k-mer counts per copy numbers found in {label} (hapl.)"
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
938 else:
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
939 text = "Distribution of k-mer counts per copy numbers found in asm"
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
940 else:
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
941 text = filename
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
942
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
943 images.append([image, Paragraph(text, styles["midiStyle"])])
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
944 except Exception as e:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
945 logging.error(f"Error processing image {png_file}: {str(e)}")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
946
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
947 # Create the table with dynamic size
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
948 if images:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
949 num_rows = (len(images) + 1) // 2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
950 num_columns = 2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
951 image_table_data = [[images[i * num_columns + j] if i * num_columns + j < len(images) else [] for j in range(num_columns)] for i in range(num_rows)]
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
952 image_table = Table(image_table_data)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
953
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
954 # Style the table
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
955 table_style = TableStyle([
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
956 ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
957 ('BOTTOMPADDING', (0, 0), (-1, -1), 20), # 20 here is a spacer between rows
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
958 ])
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
959
2
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
960 image_table.setStyle(table_style)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
961 elements.append(image_table)
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
962 else:
0efed25f6d38 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
bgruening
parents: 1
diff changeset
963 elements.append(Paragraph("No K-mer spectra images available.", styles["midiStyle"]))
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
964
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
965 # Increase counter by the number of PNGs added
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
966 counter += len(images)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
967
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
968 # If counter is a multiple of 4, insert a page break and reset counter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
969 if counter % 4 == 0:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
970 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
971
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
972 # Add spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
973 elements.append(Spacer(1, 12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
974
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
975 # If we have processed all haps and the last page does not contain exactly 4 images, insert a page break
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
976 if counter % 4 != 0:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
977 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
978
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
979 # PDF SECTION 5 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
980
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
981 # Add contamination section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
982 subtitle = Paragraph("Post-curation contamination screening", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
983 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
984
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
985 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
986 elements.append(Spacer(1, 36))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
987
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
988 # Initialize counter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
989 tool_count = 0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
990
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
991 # Add title and images for each step
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
992 for asm_stages, stage_properties in asm_data.items():
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
993 if asm_stages == 'Curated': # Check if the current stage is 'Curated'
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
994 tool_elements = list(stage_properties.keys())
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
995
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
996 for haplotype in tool_elements:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
997 haplotype_properties = stage_properties[haplotype]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
998 if isinstance(haplotype_properties, dict) and 'blobplot_cont_png' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
999 # Get image path
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1000 png_file = haplotype_properties['blobplot_cont_png']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1001
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1002 # If png_file is not empty, display it
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1003 if png_file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1004 # Create image object
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1005 img = Image(png_file, width=20 * cm, height=20 * cm)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1006 elements.append(img)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1007
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1008 # Create paragraph for filename with haplotype name
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1009 blob_text = f"<b>{haplotype}.</b> Bubble plot circles are scaled by sequence length, positioned by coverage and GC proportion, and coloured by taxonomy. Histograms show total assembly length distribution on each axis."
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1010 blob_paragraph = Paragraph(blob_text, styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1011 elements.append(blob_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1012 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1013 # Add paragraph for missing image
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1014 missing_png_paragraph = Paragraph(f"<b>{haplotype}</b> PNG is missing!", styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1015 elements.append(missing_png_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1016
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1017 # Add a page break after each image and its description
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1018 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1019
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1020 tool_count += 1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1021
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1022 # SECTION 6 -----------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1023
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1024 # Add data profile section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1025 subtitle = Paragraph("Data profile", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1026 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1027
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1028 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1029 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1030
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1031 # Create the DATA PROFILE table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1032 data_table = Table(table_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1033
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1034 # Style the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1035 data_table.setStyle(TableStyle([
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1036 ('BACKGROUND', (0, 0), (0, -1), '#e7e7e7'), # grey background for the first column
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1037 ('ALIGN', (0, 0), (-1, -1), 'CENTER'), # center alignment
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1038 ('FONTNAME', (0, 0), (-1, -1), 'Courier'), # remove bold font
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1039 ('FONTSIZE', (0, 0), (-1, -1), 12), # font size for the header
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1040 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1041 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1042 ]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1043
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1044 # Add DATA PROFILE table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1045 elements.append(data_table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1046
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1047 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1048 elements.append(Spacer(1, 32))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1049
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1050 # Add assembly pipeline section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1051 subtitle = Paragraph("Assembly pipeline", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1052 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1053
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1054 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1055 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1056
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1057 # Add ASM PIPELINE tree
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1058 elements.append(Paragraph(asm_pipeline_tree, styles['treeStyle']))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1059
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1060 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1061 elements.append(Spacer(1, 32))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1062
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1063 # Add curation pipeline section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1064 subtitle = Paragraph("Curation pipeline", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1065 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1066
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1067 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1068 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1069
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1070 # Add CURATION PIPELINE tree
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1071 elements.append(Paragraph(curation_pipeline_tree, styles['treeStyle']))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1072
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1073 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1074 elements.append(Spacer(1, 48))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1075
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1076 # Add submitter, affiliation
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1077 submitter_paragraph_style = ParagraphStyle(name='SubmitterStyle', fontName='Courier', fontSize=10)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1078 elements.append(Paragraph(f"Submitter: {submitter}", submitter_paragraph_style))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1079 elements.append(Paragraph(f"Affiliation: {affiliation}", submitter_paragraph_style))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1080
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1081 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1082 elements.append(Spacer(1, 8))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1083
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1084 # Add the date and time (CET) of the document creation
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1085 cet = pytz.timezone("CET")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1086 current_datetime = datetime.now(cet)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1087 formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M:%S %Z")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1088 elements.append(Paragraph(f"Date and time: {formatted_datetime}", submitter_paragraph_style))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1089
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1090 # Build the PDF ###############################################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1091 pdf.build(elements)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1092
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1093
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1094 if __name__ == "__main__":
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1095 parser = argparse.ArgumentParser(description='Create an ERGA Assembly Report (EAR) from a YAML file. Visit https://github.com/ERGA-consortium/EARs for more information')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1096 parser.add_argument('yaml_file', type=str, help='Path to the YAML file')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1097 args = parser.parse_args()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1098
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1099 make_report(args.yaml_file)