annotate make_EAR.py @ 1:82450f7907ef draft

planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
author bgruening
date Fri, 30 Aug 2024 09:27:17 +0000
parents 6af76d4371f8
children 0efed25f6d38
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
2 import argparse
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
3 import logging
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
4 import math
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
5 import os
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
6 import re
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
7 import sys
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
8 from datetime import datetime
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
9
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
10 import pytz
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
11 import requests
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
12 import yaml
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
13 from reportlab.lib import colors
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
14 from reportlab.lib.pagesizes import A4
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
15 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
16 from reportlab.lib.units import cm
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
17 from reportlab.platypus import Image, PageBreak, Paragraph, SimpleDocTemplate, Spacer, Table, TableStyle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
18
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
19
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
20 # make_EAR_glxy.py
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
21 # CAUTION: This is for the Galaxy version!
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
22 # by Diego De Panis
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
23 # ERGA Sequencing and Assembly Committee
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
24 EAR_version = "v24.08.26"
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
25
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
26
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
27 def make_report(yaml_file):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
28 logging.basicConfig(filename='EAR.log', level=logging.INFO)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
29 # Read the content from EAR.yaml file
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
30 with open(yaml_file, "r") as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
31 yaml_data = yaml.safe_load(file)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
32
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
33 # FUNCTIONS ###################################################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
34
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
35 def format_number(value):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
36 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
37 value_float = float(value)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
38 if value_float.is_integer():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
39 # format as an integer if no decimal part
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
40 return f'{int(value_float):,}'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
41 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
42 # format as a float
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
43 return f'{value_float:,}'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
44 except ValueError:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
45 # return the original value if it can't be converted to a float
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
46 return value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
47
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
48 # extract gfastats values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
49 def extract_gfastats_values(content, keys):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
50 return [re.findall(f"{key}: (.+)", content)[0] for key in keys]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
51
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
52 keys = [
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
53 "Total scaffold length",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
54 "GC content %",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
55 "# gaps in scaffolds",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
56 "Total gap length in scaffolds",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
57 "# scaffolds",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
58 "Scaffold N50",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
59 "Scaffold L50",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
60 "Scaffold L90",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
61 "# contigs",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
62 "Contig N50",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
63 "Contig L50",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
64 "Contig L90",
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
65 ]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
66
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
67 display_names = keys.copy()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
68 display_names[display_names.index("Total scaffold length")] = "Total bp"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
69 total_length_index = keys.index("Total scaffold length")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
70 display_names[display_names.index("GC content %")] = "GC %"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
71 display_names[display_names.index("Total gap length in scaffolds")] = "Total gap bp"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
72 display_names[display_names.index("# scaffolds")] = "Scaffolds"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
73 display_names[display_names.index("# contigs")] = "Contigs"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
74
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
75 gaps_index = keys.index("# gaps in scaffolds")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
76 exclusion_list = ["# gaps in scaffolds"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
77
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
78 # extract Total bp from gfastats report
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
79 def extract_total_bp_from_gfastats(gfastats_path):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
80 with open(gfastats_path, "r") as f:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
81 content = f.read()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
82 total_bp = re.search(r"Total scaffold length: (.+)", content).group(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
83 total_bp = int(total_bp.replace(',', ''))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
84 return "{:,}".format(total_bp)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
85
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
86 # compute EBP quality metric
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
87 def compute_ebp_metric(haplotype, gfastats_path, qv_value):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
88 keys_needed = ["Contig N50", "Scaffold N50"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
89 content = ''
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
90 with open(gfastats_path, "r") as f:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
91 content = f.read()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
92
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
93 values = extract_gfastats_values(content, keys_needed)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
94 contig_n50_log = math.floor(math.log10(int(values[0].replace(',', ''))))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
95 scaffold_n50_log = math.floor(math.log10(int(values[1].replace(',', ''))))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
96
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
97 return f"Obtained EBP quality metric for {haplotype}: {contig_n50_log}.{scaffold_n50_log}.Q{math.floor(float(qv_value))}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
98
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
99 # extract qv values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
100 def get_qv_value(file_path, order, tool, haplotype):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
101 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
102 with open(file_path, 'r') as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
103 lines = file.readlines()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
104 if len(lines) > order and (len(lines) == 1 or lines[2].split('\t')[0].strip() == "Both"):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
105 target_line = lines[order]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
106 fourth_column_value = target_line.split('\t')[3]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
107 return fourth_column_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
108 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
109 logging.error(f"Error reading {file_path} for tool {tool} and haplotype {haplotype}: {str(e)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
110 return ''
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
111
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
112 # extract Kmer completeness values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
113 def get_completeness_value(file_path, order, tool, haplotype):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
114 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
115 with open(file_path, 'r') as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
116 lines = file.readlines()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
117 if len(lines) > order:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
118 target_line = lines[order]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
119 fifth_column_value = target_line.split('\t')[4].strip()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
120 return fifth_column_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
121 except Exception as e:
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
122 logging.error(f"Error reading {file_path} for tool {tool} and haplotype {haplotype}: {str(e)}")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
123 return ''
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
124
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
125 # get unique part in file names
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
126 def find_unique_parts(file1, file2):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
127 # Split filenames into parts
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
128 parts1 = file1.split('.')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
129 parts2 = file2.split('.')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
130 # Find unique parts
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
131 unique_parts1 = [part for part in parts1 if part not in parts2]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
132 unique_parts2 = [part for part in parts2 if part not in parts1]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
133 return ' '.join(unique_parts1), ' '.join(unique_parts2)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
134
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
135 # extract BUSCO values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
136 def extract_busco_values(file_path):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
137 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
138 with open(file_path, 'r') as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
139 content = file.read()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
140 results_line = re.findall(r"C:.*n:\d+", content)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
141 s_value = re.findall(r"S:(\d+\.\d+%)", results_line)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
142 d_value = re.findall(r"D:(\d+\.\d+%)", results_line)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
143 f_value = re.findall(r"F:(\d+\.\d+%)", results_line)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
144 m_value = re.findall(r"M:(\d+\.\d+%)", results_line)[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
145 return s_value, d_value, f_value, m_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
146 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
147 logging.warning(f"Error reading {file_path}: {str(e)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
148 return '', '', '', ''
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
149
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
150 # extract BUSCO info
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
151 def extract_busco_info(file_path):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
152 busco_version = None
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
153 lineage_info = None
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
154
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
155 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
156 with open(file_path, 'r') as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
157 content = file.read()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
158 version_match = re.search(r"# BUSCO version is: ([\d.]+)", content)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
159 if version_match:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
160 busco_version = version_match.group(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
161 lineage_match = re.search(r"The lineage dataset is: (.*?) \(Creation date:.*?, number of genomes: (\d+), number of BUSCOs: (\d+)\)", content)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
162 if lineage_match:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
163 lineage_info = lineage_match.groups()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
164 if not lineage_info:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
165 lineage_match = re.search(r"The lineage dataset is: (.*?) \(Creation date:.*?, number of species: (\d+), number of BUSCOs: (\d+)\)", content)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
166 if lineage_match:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
167 lineage_info = lineage_match.groups()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
168
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
169 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
170 logging.warning(f"Error reading {file_path}: {str(e)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
171
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
172 return busco_version, lineage_info
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
173
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
174 # Function to check and generate warning messages
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
175 def generate_warning_paragraphs(expected, observed, trait):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
176 paragraphs = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
177 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
178 if trait == "Haploid size (bp)":
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
179 expected_val = int(expected.replace(',', ''))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
180 observed_val = int(observed.replace(',', ''))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
181 if abs(expected_val - observed_val) / expected_val > 0.20:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
182 message = f". Observed {trait} has >20% difference with Expected"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
183 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
184 elif trait in ["Haploid Number", "Ploidy"]:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
185 # Ensure both values are integers for comparison
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
186 expected_val = int(expected)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
187 observed_val = int(observed)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
188 if expected_val != observed_val:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
189 message = f". Observed {trait} is different from Expected"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
190 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
191 elif trait == "Sample Sex":
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
192 # Compare case-insensitive and trimmed strings
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
193 if expected.strip().lower() != observed.strip().lower():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
194 message = ". Observed sex is different from Sample sex"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
195 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
196 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
197 logging.warning(f"Error in generating warning for {trait}: {str(e)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
198 return paragraphs
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
199
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
200 # Generate warnings for curated haplotypes (qv, kcomp, busco)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
201 def generate_curated_warnings(haplotype, qv_value, completeness_value, busco_scores):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
202 paragraphs = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
203 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
204 # Ensure values are correctly interpreted as floats
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
205 qv_val = float(qv_value)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
206 completeness_val = float(completeness_value)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
207 s_value = float(busco_scores[0].rstrip('%'))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
208 d_value = float(busco_scores[1].rstrip('%'))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
209
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
210 # Check QV value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
211 if qv_val < 40:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
212 message = f". QV value is less than 40 for {haplotype}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
213 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
214
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
215 # Check Kmer completeness value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
216 if completeness_val < 90:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
217 message = f". Kmer completeness value is less than 90 for {haplotype}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
218 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
219
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
220 # Check BUSCO s_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
221 if s_value < 90:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
222 message = f". BUSCO single copy value is less than 90% for {haplotype}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
223 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
224
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
225 # Check BUSCO d_value
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
226 if d_value > 5:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
227 message = f". BUSCO duplicated value is more than 5% for {haplotype}"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
228 paragraphs.append(Paragraph(message, styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
229
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
230 except Exception as e:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
231 logging.warning(f"Error in generating warnings for {haplotype}: {str(e)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
232
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
233 return paragraphs
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
234
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
235 # Generate warnings for curated haplotypes (loss, gaps, 90inChrom)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
236 def generate_assembly_warnings(asm_data, gaps_per_gbp_data, obs_haploid_num):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
237 warnings = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
238
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
239 # Iterate over haplotypes and generate warnings based on the criteria
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
240 for haplotype in asm_stages:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
241 pre_curation_bp = extract_total_bp_from_gfastats(asm_data['Pre-curation'][haplotype]['gfastats--nstar-report_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
242 curated_bp = extract_total_bp_from_gfastats(asm_data['Curated'][haplotype]['gfastats--nstar-report_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
243 scaffold_l90 = float(gfastats_data[('Curated', haplotype)][display_names.index('Scaffold L90')].replace(',', ''))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
244
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
245 # Check for assembly length loss > 3%
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
246 if pre_curation_bp and curated_bp:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
247 loss_percentage = (float(pre_curation_bp.replace(',', '')) - float(curated_bp.replace(',', ''))) / float(pre_curation_bp.replace(',', '')) * 100
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
248 if loss_percentage > 3:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
249 warnings.append(Paragraph(f". Assembly length loss > 3% for {haplotype}", styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
250
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
251 # Check for more than 1000 gaps/Gbp
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
252 gaps_gbp = gaps_per_gbp_data.get(('Curated', haplotype), 0)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
253 if gaps_gbp > 1000:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
254 warnings.append(Paragraph(f". More than 1000 gaps/Gbp for {haplotype}", styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
255
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
256 # Check if Scaffold L90 value is more than Observed Haploid number
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
257 if scaffold_l90 > float(obs_haploid_num):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
258 warnings.append(Paragraph(f". Not 90% of assembly in chromosomes for {haplotype}", styles["midiStyle"]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
259
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
260 return warnings
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
261
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
262 # Parse pipeline and generate "tree"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
263 def generate_pipeline_tree(pipeline_data):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
264 tree_lines = []
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
265 indent = "&nbsp;" * 2 # Adjust indent spacing
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
266
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
267 if isinstance(pipeline_data, dict):
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
268 for tool, version_param in pipeline_data.items():
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
269 # Tool line
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
270 tool_line = f"- <b>{tool}</b>"
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
271 tree_lines.append(tool_line)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
272
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
273 # Convert version_param to string and split
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
274 version_param_str = str(version_param)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
275 parts = version_param_str.split('/')
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
276 version = parts[0]
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
277 params = [p for p in parts[1:] if p] # This will remove empty strings
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
278
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
279 # Version line
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
280 version_line = f"{indent * 2}|_ <i>ver:</i> {version}"
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
281 tree_lines.append(version_line)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
282
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
283 # Param line(s)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
284 if params:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
285 for param in params:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
286 param_line = f"{indent * 2}|_ <i>key param:</i> {param}"
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
287 tree_lines.append(param_line)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
288 else:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
289 param_line = f"{indent * 2}|_ <i>key param:</i> NA"
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
290 tree_lines.append(param_line)
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
291 else:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
292 tree_lines.append("Invalid pipeline data format")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
293
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
294 # Join lines with HTML break for paragraph
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
295 tree_diagram = "<br/>".join(tree_lines)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
296 return tree_diagram
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
297
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
298 # Reading SAMPLE INFORMATION section from yaml ################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
299
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
300 # Check for required fields
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
301 required_fields = ["ToLID", "Species", "Sex", "Submitter", "Affiliation", "Tags"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
302 missing_fields = [field for field in required_fields if field not in yaml_data or not yaml_data[field]]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
303
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
304 if missing_fields:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
305 logging.error(f"# GENERAL INFORMATION section in the yaml file is missing or empty for the following information: {', '.join(missing_fields)}")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
306 sys.exit(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
307
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
308 # Check that "Species" field is a string
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
309 if not isinstance(yaml_data["Species"], str):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
310 logging.error(f"# GENERAL INFORMATION section in the yaml file contains incorrect data type for 'Species'. Expected 'str' but got '{type(yaml_data['Species']).__name__}'.")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
311 sys.exit(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
312
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
313 # Get data for Header, ToLID table and submitter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
314 tol_id = yaml_data["ToLID"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
315 species = yaml_data["Species"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
316 sex = yaml_data["Sex"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
317 submitter = yaml_data["Submitter"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
318 affiliation = yaml_data["Affiliation"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
319 tags = yaml_data["Tags"]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
320
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
321 # Check if tag is valid
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
322 valid_tags = ["ERGA-BGE", "ERGA-Pilot", "ERGA-Community", "ERGA-testing"]
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
323 if tags not in valid_tags:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
324 tags += "[INVALID TAG]"
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
325 logging.warning("# SAMPLE INFORMATION section in the yaml file contains an invalid tag. Valid tags are ERGA-BGE, ERGA-Pilot and ERGA-Community.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
326
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
327 # Get data from GoaT based on species name
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
328 # urllib.parse.quote to handle special characters and spaces in the species name
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
329 species_name = requests.utils.quote(species)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
330
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
331 # Get stuff from GoaT
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
332 goat_response = requests.get(f'https://goat.genomehubs.org/api/v2/search?query=tax_name%28{species_name}%29&result=taxon')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
333 goat_data = goat_response.json() # convert json to dict
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
334
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
335 taxon_number = goat_data['results'][0]['result']['taxon_id']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
336
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
337 goat_results = goat_data['results']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
338
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
339 class_name = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
340 order_name = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
341 haploid_number = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
342 haploid_source = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
343 ploidy = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
344 ploidy_source = 'NA'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
345
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
346 for result in goat_results:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
347 lineage = result['result']['lineage']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
348 for node in lineage:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
349 if node['taxon_rank'] == 'class':
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
350 class_name = node['scientific_name']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
351 if node['taxon_rank'] == 'order':
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
352 order_name = node['scientific_name']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
353
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
354 goat_second_response = requests.get(f'https://goat.genomehubs.org/api/v2/record?recordId={taxon_number}&result=taxon&taxonomy=ncbi')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
355 goat_second_data = goat_second_response.json()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
356
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
357 ploidy_info = goat_second_data['records'][0]['record']['attributes']['ploidy']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
358
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
359 ploidy = ploidy_info['value']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
360 ploidy_source = ploidy_info['aggregation_source']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
361
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
362 haploid_info = goat_second_data['records'][0]['record']['attributes']['haploid_number']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
363
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
364 haploid_number = haploid_info['value']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
365 haploid_source = haploid_info['aggregation_source']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
366
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
367 sp_data = [
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
368 ["TxID", "ToLID", "Species", "Class", "Order"],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
369 [taxon_number, tol_id, species, class_name, order_name]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
370 ]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
371
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
372 # Transpose the data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
373 transposed_sp_data = list(map(list, zip(*sp_data)))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
374
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
375 # Reading SEQUENCING DATA section from yaml ###################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
376
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
377 # get DATA section from yaml
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
378 data_list = yaml_data.get('DATA', [])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
379
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
380 # Prepare headers
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
381 headers = ['Data']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
382 data_values = ['Coverage']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
383
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
384 # Extract data from YAML and format it for the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
385 for item in data_list:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
386 for technology, coverage in item.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
387 headers.append(technology)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
388 data_values.append('NA' if not coverage else coverage)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
389
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
390 # Create a list of lists for the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
391 table_data = [headers, data_values]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
392
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
393 # Extract pipeline data
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
394 asm_pipeline_data = yaml_data.get('PIPELINES', {}).get('Assembly', {})
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
395 curation_pipeline_data = yaml_data.get('PIPELINES', {}).get('Curation', {})
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
396
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
397 # Extract pipeline data from 'Curated' category
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
398 asm_pipeline_tree = generate_pipeline_tree(asm_pipeline_data)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
399 curation_pipeline_tree = generate_pipeline_tree(curation_pipeline_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
400
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
401 # Reading GENOME PROFILING DATA section from yaml #############################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
402 profiling_data = yaml_data.get('PROFILING')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
403
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
404 # Check if profiling_data is available
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
405 if not profiling_data:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
406 logging.error('Error: No profiling data found in the YAML file.')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
407 sys.exit(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
408
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
409 # Check for GenomeScope data (mandatory)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
410 genomescope_data = profiling_data.get('GenomeScope')
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
411 if not genomescope_data:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
412 logging.error("Error: GenomeScope data is missing in the YAML file. This is mandatory.")
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
413 sys.exit(1)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
414
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
415 genomescope_summary = genomescope_data.get('genomescope_summary_txt')
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
416 if not genomescope_summary:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
417 logging.error("Error: GenomeScope summary file path is missing in the YAML file.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
418 sys.exit(1)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
419
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
420 # Read the content of the GenomeScope summary file
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
421 try:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
422 with open(genomescope_summary, "r") as f:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
423 summary_txt = f.read()
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
424 # Extract values from summary.txt
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
425 genome_haploid_length = re.search(r"Genome Haploid Length\s+([\d,]+) bp", summary_txt).group(1)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
426 proposed_ploidy = re.search(r"p = (\d+)", summary_txt).group(1)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
427 except Exception as e:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
428 logging.error(f"Error reading GenomeScope summary file: {str(e)}")
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
429 sys.exit(1)
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
430
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
431 # Check for Smudgeplot data (optional)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
432 smudgeplot_data = profiling_data.get('Smudgeplot')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
433 if smudgeplot_data:
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
434 smudgeplot_summary = smudgeplot_data.get('smudgeplot_verbose_summary_txt')
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
435 if smudgeplot_summary:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
436 try:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
437 with open(smudgeplot_summary, "r") as f:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
438 smud_summary_txt = f.readlines()
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
439 for line in smud_summary_txt:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
440 if line.startswith("* Proposed ploidy"):
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
441 proposed_ploidy = line.split(":")[1].strip()
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
442 break
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
443 except Exception as e:
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
444 logging.warning(f"Error reading Smudgeplot summary file: {str(e)}. Using GenomeScope ploidy.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
445 else:
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
446 logging.warning("Smudgeplot summary file path is missing. Using GenomeScope ploidy.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
447 else:
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
448 logging.info("Smudgeplot data not provided. Using GenomeScope ploidy.")
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
449
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
450 # Reading ASSEMBLY DATA section from yaml #####################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
451
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
452 asm_data = yaml_data.get('ASSEMBLIES', {})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
453
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
454 # make a list from the assemblies available in asm_data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
455 asm_stages = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
456 for asm_stage, stage_properties in asm_data.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
457 for haplotypes in stage_properties.keys():
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
458 if haplotypes not in asm_stages:
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
459 asm_stages.append(haplotypes)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
460
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
461 # get gfastats-based data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
462 gfastats_data = {}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
463 for asm_stage, stage_properties in asm_data.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
464 for haplotypes, haplotype_properties in stage_properties.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
465 if isinstance(haplotype_properties, dict):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
466 if 'gfastats--nstar-report_txt' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
467 file_path = haplotype_properties['gfastats--nstar-report_txt']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
468 with open(file_path, 'r') as file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
469 content = file.read()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
470 gfastats_data[(asm_stage, haplotypes)] = extract_gfastats_values(content, keys)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
471
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
472 gaps_per_gbp_data = {}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
473 for (asm_stage, haplotypes), values in gfastats_data.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
474 try:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
475 gaps = float(values[gaps_index])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
476 total_length = float(values[total_length_index])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
477 gaps_per_gbp = round((gaps / total_length * 1_000_000_000), 2)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
478 gaps_per_gbp_data[(asm_stage, haplotypes)] = gaps_per_gbp
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
479 except (ValueError, ZeroDivisionError):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
480 gaps_per_gbp_data[(asm_stage, haplotypes)] = ''
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
481
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
482 # Define the contigging table (column names)
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
483 asm_table_data = [["Metrics"] + [f'{asm_stage} \n {haplotypes}' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
484
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
485 # Fill the table with the gfastats data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
486 for i in range(len(display_names)):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
487 metric = display_names[i]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
488 if metric not in exclusion_list:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
489 asm_table_data.append([metric] + [format_number(gfastats_data.get((asm_stage, haplotypes), [''])[i]) if (asm_stage, haplotypes) in gfastats_data else '' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
490
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
491 # Add the gaps/gbp in between
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
492 asm_table_data.insert(gaps_index + 1, ['Gaps/Gbp'] + [format_number(gaps_per_gbp_data.get((asm_stage, haplotypes), '')) for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
493
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
494 # get QV, Kmer completeness and BUSCO data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
495 qv_data = {}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
496 completeness_data = {}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
497 busco_data = {metric: {} for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
498 for asm_stage, stage_properties in asm_data.items():
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
499 asm_stage_elements = list(stage_properties.keys())
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
500 for i, haplotypes in enumerate(asm_stage_elements):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
501 haplotype_properties = stage_properties[haplotypes]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
502 if isinstance(haplotype_properties, dict):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
503 if 'merqury_qv' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
504 qv_data[(asm_stage, haplotypes)] = get_qv_value(haplotype_properties['merqury_qv'], i, asm_stage, haplotypes)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
505 if 'merqury_completeness_stats' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
506 completeness_data[(asm_stage, haplotypes)] = get_completeness_value(haplotype_properties['merqury_completeness_stats'], i, asm_stage, haplotypes)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
507 if 'busco_short_summary_txt' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
508 s_value, d_value, f_value, m_value = extract_busco_values(haplotype_properties['busco_short_summary_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
509 busco_data['BUSCO sing.'].update({(asm_stage, haplotypes): s_value})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
510 busco_data['BUSCO dupl.'].update({(asm_stage, haplotypes): d_value})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
511 busco_data['BUSCO frag.'].update({(asm_stage, haplotypes): f_value})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
512 busco_data['BUSCO miss.'].update({(asm_stage, haplotypes): m_value})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
513
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
514 # Fill the table with the QV data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
515 asm_table_data.append(['QV'] + [qv_data.get((asm_stage, haplotypes), '') for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
516
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
517 # Fill the table with the Kmer completeness data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
518 asm_table_data.append(['Kmer compl.'] + [completeness_data.get((asm_stage, haplotypes), '') for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
519
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
520 # Fill the table with the BUSCO data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
521 for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
522 asm_table_data.append([metric] + [busco_data[metric].get((asm_stage, haplotypes), '') for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
523
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
524 # Reading CURATION NOTES section from yaml ####################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
525
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
526 obs_haploid_num = yaml_data.get("NOTES", {}).get("Obs_Haploid_num", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
527 obs_sex = yaml_data.get("NOTES", {}).get("Obs_Sex", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
528 interventions_per_gb = yaml_data.get("NOTES", {}).get("Interventions_per_Gb", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
529 contamination_notes = yaml_data.get("NOTES", {}).get("Contamination_notes", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
530 other_notes = yaml_data.get("NOTES", {}).get("Other_notes", "NA")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
531
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
532 # Extract Total bp for each haplotype and find the maximum
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
533 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
534 total_bp_values = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
535 for haplotype, properties in curated_assemblies.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
536 if 'gfastats--nstar-report_txt' in properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
537 total_bp = extract_total_bp_from_gfastats(properties['gfastats--nstar-report_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
538 total_bp_values.append(total_bp)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
539
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
540 max_total_bp = max(total_bp_values, default='NA')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
541
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
542 # Create table data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
543 genome_traits_table_data = [
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
544 ["Genome Traits", "Expected", "Observed"],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
545 ["Haploid size (bp)", genome_haploid_length, f"{max_total_bp}"],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
546 ["Haploid Number", f"{haploid_number} (source: {haploid_source})", obs_haploid_num],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
547 ["Ploidy", f"{ploidy} (source: {ploidy_source})", proposed_ploidy],
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
548 ["Sample Sex", sex, obs_sex]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
549 ]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
550
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
551 # Get curator notes
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
552 curator_notes_text = (
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
553 f". Interventions/Gb: {interventions_per_gb}<br/>"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
554 f". Contamination notes: &quot;{contamination_notes}&quot;<br/>"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
555 f". Other observations: &quot;{other_notes}&quot;"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
556 )
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
557
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
558 # PDF CONSTRUCTION ############################################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
559
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
560 # Set up the PDF file
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
561 pdf_filename = "EAR.pdf"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
562 margin = 0.5 * 72 # 0.5 inch in points (normal margin is 1 inch)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
563 pdf = SimpleDocTemplate(pdf_filename,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
564 pagesize=A4,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
565 leftMargin=margin,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
566 rightMargin=margin,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
567 topMargin=margin,
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
568 bottomMargin=margin)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
569 elements = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
570
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
571 # Set all the styles
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
572 styles = getSampleStyleSheet()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
573 styles.add(ParagraphStyle(name='TitleStyle', fontName='Courier', fontSize=20))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
574 styles.add(ParagraphStyle(name='subTitleStyle', fontName='Courier', fontSize=16))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
575 styles.add(ParagraphStyle(name='normalStyle', fontName='Courier', fontSize=12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
576 styles.add(ParagraphStyle(name='midiStyle', fontName='Courier', fontSize=10))
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
577 # styles.add(ParagraphStyle(name='LinkStyle', fontName='Courier', fontSize=10, textColor='blue', underline=True))
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
578 styles.add(ParagraphStyle(name='treeStyle', fontName='Courier', fontSize=10, leftIndent=12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
579 styles.add(ParagraphStyle(name='miniStyle', fontName='Courier', fontSize=8))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
580 styles.add(ParagraphStyle(name='FileNameStyle', fontName='Courier', fontSize=6))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
581
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
582 # PDF SECTION 1 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
583
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
584 # Add the title
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
585 title = Paragraph("ERGA Assembly Report", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
586 elements.append(title)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
587
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
588 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
589 elements.append(Spacer(1, 12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
590
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
591 # Add version
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
592 ver_paragraph = Paragraph(EAR_version, styles['normalStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
593 elements.append(ver_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
594
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
595 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
596 elements.append(Spacer(1, 12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
597
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
598 # Add tags
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
599 tags_paragraph = Paragraph(f"Tags: {tags}", styles['normalStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
600 elements.append(tags_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
601
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
602 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
603 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
604
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
605 # Create the SPECIES DATA table with the transposed data
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
606 sp_data_table = Table(transposed_sp_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
607
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
608 # Style the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
609 sp_data_table.setStyle(TableStyle([
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
610 ("BACKGROUND", (0, 0), (0, -1), '#e7e7e7'), # Grey background for column 1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
611 ("BACKGROUND", (1, 0), (1, -1), colors.white), # White background for column 2
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
612 ("ALIGN", (0, 0), (-1, -1), "CENTER"),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
613 ('FONTNAME', (0, 0), (0, 0), 'Courier'), # Regular font for row1, col1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
614 ('FONTNAME', (1, 0), (1, 0), 'Courier'),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
615 ('FONTNAME', (0, 1), (-1, -1), 'Courier'), # Regular font for the rest of the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
616 ('FONTNAME', (1, 1), (1, 1), 'Courier-Bold'), # Bold font for row1, col2
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
617 ("FONTSIZE", (0, 0), (-1, -1), 14),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
618 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
619 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
620 ]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
621
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
622 # Add SPECIES DATA table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
623 elements.append(sp_data_table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
624
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
625 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
626 elements.append(Spacer(1, 32))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
627
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
628 # Create the GENOME TRAITS table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
629 genome_traits_table = Table(genome_traits_table_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
630
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
631 # Style the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
632 genome_traits_table.setStyle(TableStyle([
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
633 ('BACKGROUND', (0, 0), (0, -1), '#e7e7e7'),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
634 ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
635 ('FONTNAME', (0, 0), (-1, -1), 'Courier'),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
636 ('FONTSIZE', (0, 0), (-1, -1), 12),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
637 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
638 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
639 ]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
640
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
641 # Add GENOME TRAITS table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
642 elements.append(genome_traits_table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
643
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
644 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
645 elements.append(Spacer(1, 28))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
646
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
647 # Add EBP METRICS SECTION subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
648 subtitle = Paragraph("EBP metrics summary and curation notes", styles['subTitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
649 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
650
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
651 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
652 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
653
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
654 # Iterate over haplotypes in the Curated category to get data for EBP metrics
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
655 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
656 haplotype_names = list(curated_assemblies.keys())
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
657
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
658 for haplotype in haplotype_names:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
659 properties = curated_assemblies[haplotype]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
660 if 'gfastats--nstar-report_txt' in properties and 'merqury_qv' in properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
661 gfastats_path = properties['gfastats--nstar-report_txt']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
662 order = haplotype_names.index(haplotype) # Determine the order based on the position of the haplotype in the list
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
663 qv_value = get_qv_value(properties['merqury_qv'], order, 'Curated', haplotype)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
664
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
665 ebp_quality_metric = compute_ebp_metric(haplotype, gfastats_path, qv_value)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
666 EBP_metric_paragraph = Paragraph(ebp_quality_metric, styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
667
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
668 # Add the EBP quality metric paragraph to elements
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
669 elements.append(EBP_metric_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
670
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
671 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
672 elements.append(Spacer(1, 8))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
673
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
674 # Add sentence
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
675 Textline = Paragraph("The following metrics were automatically flagged as below EBP recommended standards or different from expected:", styles['midiStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
676 elements.append(Textline)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
677
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
678 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
679 elements.append(Spacer(1, 4))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
680
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
681 # Apply checks and add warning paragraphs to elements
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
682 elements += generate_warning_paragraphs(genome_haploid_length, max_total_bp, "Haploid size (bp)")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
683 elements += generate_warning_paragraphs(haploid_number, obs_haploid_num, "Haploid Number")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
684 elements += generate_warning_paragraphs(proposed_ploidy, ploidy, "Ploidy")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
685 elements += generate_warning_paragraphs(sex, obs_sex, "Sample Sex")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
686
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
687 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
688 elements.append(Spacer(1, 4))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
689
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
690 # Iterate over haplotypes in the Curated category and apply checks
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
691 for haplotype in haplotype_names:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
692 properties = curated_assemblies[haplotype]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
693 if isinstance(properties, dict) and 'merqury_qv' in properties and 'merqury_completeness_stats' in properties and 'busco_short_summary_txt' in properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
694 order = haplotype_names.index(haplotype)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
695 qv_value = get_qv_value(properties['merqury_qv'], order, "Curated", haplotype)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
696 completeness_value = get_completeness_value(properties['merqury_completeness_stats'], order, "Curated", haplotype)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
697 busco_scores = extract_busco_values(properties['busco_short_summary_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
698
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
699 warnings = generate_curated_warnings(haplotype, qv_value, completeness_value, busco_scores)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
700 elements += warnings
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
701
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
702 assembly_warnings = generate_assembly_warnings(asm_data, gaps_per_gbp_data, obs_haploid_num)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
703 elements.extend(assembly_warnings)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
704
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
705 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
706 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
707
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
708 # Add small subtitle for Curator notes
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
709 subtitle = Paragraph("Curator notes", styles['normalStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
710 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
711
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
712 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
713 elements.append(Spacer(1, 8))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
714
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
715 # Curator notes
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
716 curator_notes_paragraph = Paragraph(curator_notes_text, styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
717 elements.append(curator_notes_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
718
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
719 # Page break
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
720 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
721
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
722 # PDF SECTION 2 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
723
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
724 # Add quality metrics section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
725 subtitle = Paragraph("Quality metrics table", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
726 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
727
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
728 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
729 elements.append(Spacer(1, 48))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
730
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
731 # create QUALITY METRICS table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
732 asm_table = Table(asm_table_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
733
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
734 # Style the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
735 asm_table.setStyle(TableStyle([
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
736 ('BACKGROUND', (0, 0), (-1, 0), '#e7e7e7'), # grey background for the header
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
737 ('ALIGN', (0, 0), (-1, -1), 'CENTER'), # center alignment
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
738 ('FONTNAME', (0, 0), (-1, -1), 'Courier'), # bold font for the header
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
739 ('FONTSIZE', (0, 0), (-1, -1), 11), # font size
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
740 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
741 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
742 ]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
743
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
744 # Add QUALITY METRICS table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
745 elements.append(asm_table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
746
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
747 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
748 elements.append(Spacer(1, 5))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
749
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
750 # Store BUSCO version and lineage information from each file in list
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
751 busco_info_list = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
752 for asm_stages, stage_properties in asm_data.items():
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
753 for i, haplotype_properties in stage_properties.items():
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
754 if isinstance(haplotype_properties, dict):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
755 if 'busco_short_summary_txt' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
756 busco_version, lineage_info = extract_busco_info(haplotype_properties['busco_short_summary_txt'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
757 if busco_version and lineage_info:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
758 busco_info_list.append((busco_version, lineage_info))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
759
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
760 # Checking if all elements in the list are identical
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
761 if all(info == busco_info_list[0] for info in busco_info_list):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
762 busco_version, (lineage_name, num_genomes, num_buscos) = busco_info_list[0]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
763 elements.append(Paragraph(f"BUSCO {busco_version} Lineage: {lineage_name} (genomes:{num_genomes}, BUSCOs:{num_buscos})", styles['miniStyle']))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
764 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
765 elements.append(Paragraph("Warning: BUSCO versions or lineage datasets are not the same across results", styles['miniStyle']))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
766 logging.warning("WARNING!!! BUSCO versions or lineage datasets are not the same across results")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
767
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
768 # Page break
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
769 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
770
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
771 # PDF SECTION 3 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
772
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
773 # Add hic maps section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
774 subtitle = Paragraph("HiC contact map of curated assembly", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
775 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
776
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
777 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
778 elements.append(Spacer(1, 36))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
779
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
780 # Initialize counter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
781 tool_count = 0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
782
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
783 # Add title and images for each step
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
784 for asm_stages, stage_properties in asm_data.items():
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
785 if asm_stages == 'Curated':
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
786 tool_elements = list(stage_properties.keys())
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
787
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
788 images_with_names = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
789
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
790 for haplotype in tool_elements:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
791 haplotype_properties = stage_properties[haplotype]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
792
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
793 # Check if there is an image and/or a link
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
794 png_file = haplotype_properties.get('hic_FullMap_png', '')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
795 link = haplotype_properties.get('hic_FullMap_link', '')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
796
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
797 # Prepare paragraphs for the image and link
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
798 if png_file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
799 # Create image object
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
800 img = Image(png_file, width=11 * cm, height=11 * cm)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
801 images_with_names.append([img])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
802 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
803 # Add paragraph for missing image
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
804 missing_png_paragraph = Paragraph(f"<b>{haplotype}</b> HiC PNG is missing!", styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
805 images_with_names.append([missing_png_paragraph])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
806
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
807 # Add paragraph for the link
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
808 if link:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
809 link_html = f'<b>{haplotype}</b> <link href="{link}" color="blue">[LINK]</link>'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
810 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
811 link_html = f'<b>{haplotype}</b> File link is missing!'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
812
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
813 link_paragraph = Paragraph(link_html, styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
814 images_with_names.append([link_paragraph])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
815
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
816 # Append a spacer only if the next element is an image
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
817 if len(tool_elements) > 1 and tool_elements.index(haplotype) < len(tool_elements) - 1:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
818 images_with_names.append([Spacer(1, 12)])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
819
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
820 # Add images and names to the elements in pairs
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
821 for i in range(0, len(images_with_names), 4): # Process two images (and their names) at a time
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
822 elements_to_add = images_with_names[i: i + 4]
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
823
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
824 # Create table for the images and names
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
825 table = Table(elements_to_add)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
826 table.hAlign = 'CENTER'
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
827 elements.append(table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
828
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
829 # Add a page break conditionally
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
830 next_elements_start = i + 4
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
831 if next_elements_start < len(images_with_names):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
832 if len(images_with_names[next_elements_start]) > 0 and isinstance(images_with_names[next_elements_start][0], Image):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
833 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
834
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
835 tool_count += 1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
836
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
837 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
838
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
839 # PDF SECTION 4 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
840
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
841 # Add kmer spectra section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
842 subtitle = Paragraph("K-mer spectra of curated assembly", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
843 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
844
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
845 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
846 elements.append(Spacer(1, 48))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
847
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
848 # Initialize counter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
849 counter = 0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
850
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
851 # Iterate over haplotypes in the Curated category to get K-mer spectra images
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
852 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {})
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
853
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
854 # Get paths for spectra files
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
855 spectra_files = {
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
856 'hap1': {
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
857 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_hap_spectra_cn_png', None),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
858 },
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
859 'hap2': {
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
860 'spectra_cn_png': curated_assemblies.get('hap2', {}).get('merqury_hap_spectra_cn_png', None),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
861 },
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
862 'common': {
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
863 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_spectra_cn_png', None),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
864 'spectra_asm_png': curated_assemblies.get('hap1', {}).get('merqury_spectra_asm_png', None),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
865 }
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
866 }
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
867
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
868 # Filter out None values and empty strings
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
869 spectra_files = {k: {sk: v for sk, v in sv.items() if v} for k, sv in spectra_files.items()}
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
870
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
871 # Determine the number of spectra-cn files and assign unique names if needed
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
872 spectra_cn_files = [
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
873 spectra_files['common'].get('spectra_cn_png', None),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
874 spectra_files['hap1'].get('spectra_cn_png', None),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
875 spectra_files['hap2'].get('spectra_cn_png', None)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
876 ]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
877 spectra_cn_files = [f for f in spectra_cn_files if f] # Filter out None values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
878
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
879 if len(spectra_cn_files) == 3:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
880 # For 3 spectra-cn files
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
881 shortest_spectra_cn_file = min(spectra_cn_files, key=lambda f: len(os.path.basename(f)), default=None)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
882 similar_files = [f for f in spectra_cn_files if f != shortest_spectra_cn_file]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
883 if similar_files:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
884 unique_name1, unique_name2 = find_unique_parts(os.path.basename(similar_files[0]), os.path.basename(similar_files[1]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
885 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
886 shortest_spectra_cn_file = spectra_cn_files[0] if spectra_cn_files else None
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
887 unique_name1 = unique_name2 = None
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
888
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
889 # Create image objects and add filename below each image
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
890 images = []
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
891
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
892 for label, file_dict in spectra_files.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
893 for key, png_file in file_dict.items():
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
894 if png_file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
895 image = Image(png_file, width=8.4 * cm, height=7 * cm)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
896 filename = os.path.basename(png_file)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
897
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
898 if filename.endswith("spectra-asm.ln.png"):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
899 text = "Distribution of k-mer counts coloured by their presence in reads/assemblies"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
900 elif filename.endswith("spectra-cn.ln.png"):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
901 if len(spectra_cn_files) == 3:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
902 # For 3 spectra-cn files use particular text
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
903 if png_file == shortest_spectra_cn_file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
904 text = "Distribution of k-mer counts per copy numbers found in asm (dipl.)"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
905 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
906 if png_file == spectra_files['hap1'].get('spectra_cn_png', None):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
907 text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name1}</b> (hapl.)"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
908 elif png_file == spectra_files['hap2'].get('spectra_cn_png', None):
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
909 text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name2}</b> (hapl.)"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
910 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
911 text = "Distribution of k-mer counts per copy numbers found in asm"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
912 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
913 # For 2 spectra-cn files use same text
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
914 text = "Distribution of k-mer counts per copy numbers found in asm"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
915 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
916 text = filename
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
917
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
918 images.append([image, Paragraph(text, styles["midiStyle"])])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
919
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
920 # Filter None values
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
921 images = [img for img in images if img[0] is not None]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
922
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
923 # Get number of rows and columns for the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
924 num_rows = (len(images) + 1) // 2 # +1 to handle odd numbers of images
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
925 num_columns = 2
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
926
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
927 # Create the table with dynamic size
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
928 image_table_data = [[images[i * num_columns + j] if i * num_columns + j < len(images) else [] for j in range(num_columns)] for i in range(num_rows)]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
929 image_table = Table(image_table_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
930
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
931 # Style the "table"
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
932 table_style = TableStyle([
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
933 ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
934 ('BOTTOMPADDING', (0, 0), (-1, -1), 20), # 20 here is a spacer between rows
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
935 ])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
936
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
937 # Set the style
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
938 image_table.setStyle(table_style)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
939
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
940 # Add image table to elements
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
941 elements.append(image_table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
942
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
943 # Increase counter by the number of PNGs added
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
944 counter += len(images)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
945
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
946 # If counter is a multiple of 4, insert a page break and reset counter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
947 if counter % 4 == 0:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
948 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
949
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
950 # Add spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
951 elements.append(Spacer(1, 12))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
952
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
953 # If we have processed all haps and the last page does not contain exactly 4 images, insert a page break
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
954 if counter % 4 != 0:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
955 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
956
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
957 # PDF SECTION 5 -------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
958
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
959 # Add contamination section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
960 subtitle = Paragraph("Post-curation contamination screening", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
961 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
962
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
963 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
964 elements.append(Spacer(1, 36))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
965
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
966 # Initialize counter
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
967 tool_count = 0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
968
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
969 # Add title and images for each step
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
970 for asm_stages, stage_properties in asm_data.items():
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
971 if asm_stages == 'Curated': # Check if the current stage is 'Curated'
1
82450f7907ef planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit e293d14e82a903a4cab64dd72dfa3f3798466176
bgruening
parents: 0
diff changeset
972 tool_elements = list(stage_properties.keys())
0
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
973
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
974 for haplotype in tool_elements:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
975 haplotype_properties = stage_properties[haplotype]
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
976 if isinstance(haplotype_properties, dict) and 'blobplot_cont_png' in haplotype_properties:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
977 # Get image path
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
978 png_file = haplotype_properties['blobplot_cont_png']
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
979
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
980 # If png_file is not empty, display it
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
981 if png_file:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
982 # Create image object
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
983 img = Image(png_file, width=20 * cm, height=20 * cm)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
984 elements.append(img)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
985
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
986 # Create paragraph for filename with haplotype name
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
987 blob_text = f"<b>{haplotype}.</b> Bubble plot circles are scaled by sequence length, positioned by coverage and GC proportion, and coloured by taxonomy. Histograms show total assembly length distribution on each axis."
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
988 blob_paragraph = Paragraph(blob_text, styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
989 elements.append(blob_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
990 else:
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
991 # Add paragraph for missing image
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
992 missing_png_paragraph = Paragraph(f"<b>{haplotype}</b> PNG is missing!", styles["midiStyle"])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
993 elements.append(missing_png_paragraph)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
994
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
995 # Add a page break after each image and its description
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
996 elements.append(PageBreak())
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
997
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
998 tool_count += 1
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
999
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1000 # SECTION 6 -----------------------------------------------------------------------------------
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1001
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1002 # Add data profile section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1003 subtitle = Paragraph("Data profile", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1004 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1005
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1006 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1007 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1008
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1009 # Create the DATA PROFILE table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1010 data_table = Table(table_data)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1011
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1012 # Style the table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1013 data_table.setStyle(TableStyle([
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1014 ('BACKGROUND', (0, 0), (0, -1), '#e7e7e7'), # grey background for the first column
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1015 ('ALIGN', (0, 0), (-1, -1), 'CENTER'), # center alignment
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1016 ('FONTNAME', (0, 0), (-1, -1), 'Courier'), # remove bold font
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1017 ('FONTSIZE', (0, 0), (-1, -1), 12), # font size for the header
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1018 ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1019 ("GRID", (0, 0), (-1, -1), 0.5, colors.black)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1020 ]))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1021
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1022 # Add DATA PROFILE table
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1023 elements.append(data_table)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1024
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1025 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1026 elements.append(Spacer(1, 32))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1027
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1028 # Add assembly pipeline section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1029 subtitle = Paragraph("Assembly pipeline", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1030 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1031
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1032 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1033 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1034
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1035 # Add ASM PIPELINE tree
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1036 elements.append(Paragraph(asm_pipeline_tree, styles['treeStyle']))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1037
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1038 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1039 elements.append(Spacer(1, 32))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1040
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1041 # Add curation pipeline section subtitle
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1042 subtitle = Paragraph("Curation pipeline", styles['TitleStyle'])
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1043 elements.append(subtitle)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1044
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1045 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1046 elements.append(Spacer(1, 24))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1047
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1048 # Add CURATION PIPELINE tree
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1049 elements.append(Paragraph(curation_pipeline_tree, styles['treeStyle']))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1050
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1051 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1052 elements.append(Spacer(1, 48))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1053
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1054 # Add submitter, affiliation
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1055 submitter_paragraph_style = ParagraphStyle(name='SubmitterStyle', fontName='Courier', fontSize=10)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1056 elements.append(Paragraph(f"Submitter: {submitter}", submitter_paragraph_style))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1057 elements.append(Paragraph(f"Affiliation: {affiliation}", submitter_paragraph_style))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1058
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1059 # Spacer
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1060 elements.append(Spacer(1, 8))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1061
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1062 # Add the date and time (CET) of the document creation
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1063 cet = pytz.timezone("CET")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1064 current_datetime = datetime.now(cet)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1065 formatted_datetime = current_datetime.strftime("%Y-%m-%d %H:%M:%S %Z")
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1066 elements.append(Paragraph(f"Date and time: {formatted_datetime}", submitter_paragraph_style))
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1067
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1068 # Build the PDF ###############################################################################
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1069 pdf.build(elements)
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1070
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1071
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1072 if __name__ == "__main__":
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1073 parser = argparse.ArgumentParser(description='Create an ERGA Assembly Report (EAR) from a YAML file. Visit https://github.com/ERGA-consortium/EARs for more information')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1074 parser.add_argument('yaml_file', type=str, help='Path to the YAML file')
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1075 args = parser.parse_args()
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1076
6af76d4371f8 planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit a08f73e00550020ac83f4d45045075962a8a2251
bgruening
parents:
diff changeset
1077 make_report(args.yaml_file)