Mercurial > repos > bgruening > erga_ear
comparison make_EAR.py @ 2:0efed25f6d38 draft default tip
planemo upload for repository https://github.com/ERGA-consortium/EARs/tree/main commit 720787c4fb8885f5127ab6ada2813f8dd580921c
author | bgruening |
---|---|
date | Tue, 15 Oct 2024 12:52:44 +0000 |
parents | 82450f7907ef |
children |
comparison
equal
deleted
inserted
replaced
1:82450f7907ef | 2:0efed25f6d38 |
---|---|
19 | 19 |
20 # make_EAR_glxy.py | 20 # make_EAR_glxy.py |
21 # CAUTION: This is for the Galaxy version! | 21 # CAUTION: This is for the Galaxy version! |
22 # by Diego De Panis | 22 # by Diego De Panis |
23 # ERGA Sequencing and Assembly Committee | 23 # ERGA Sequencing and Assembly Committee |
24 EAR_version = "v24.08.26" | 24 EAR_version = "v24.10.15" |
25 | 25 |
26 | 26 |
27 def make_report(yaml_file): | 27 def make_report(yaml_file): |
28 logging.basicConfig(filename='EAR.log', level=logging.INFO) | 28 logging.basicConfig(filename='EAR.log', level=logging.INFO) |
29 # Read the content from EAR.yaml file | 29 # Read the content from EAR.yaml file |
34 | 34 |
35 def format_number(value): | 35 def format_number(value): |
36 try: | 36 try: |
37 value_float = float(value) | 37 value_float = float(value) |
38 if value_float.is_integer(): | 38 if value_float.is_integer(): |
39 # format as an integer if no decimal part | 39 # format as integer if no decimal |
40 return f'{int(value_float):,}' | 40 return f'{int(value_float):,}' |
41 else: | 41 else: |
42 # format as a float | 42 # format as a float |
43 return f'{value_float:,}' | 43 return f'{value_float:,}' |
44 except ValueError: | 44 except ValueError: |
45 # return the original value if it can't be converted to a float | 45 # return original value if can't be converted to float |
46 return value | 46 return value |
47 | 47 |
48 # extract gfastats values | 48 # extract gfastats values |
49 def extract_gfastats_values(content, keys): | 49 def extract_gfastats_values(content, keys): |
50 return [re.findall(f"{key}: (.+)", content)[0] for key in keys] | 50 values = [] |
51 for key in keys: | |
52 # colon-separated as default format first | |
53 match = re.search(rf"{re.escape(key)}:\s*(.+)", content) | |
54 if not match: | |
55 # If not try galaxy's tab-separated | |
56 match = re.search(rf"{re.escape(key)}\t(.+)", content) | |
57 if match: | |
58 values.append(match.group(1).strip()) | |
59 else: | |
60 values.append("N/A") | |
61 return values | |
51 | 62 |
52 keys = [ | 63 keys = [ |
53 "Total scaffold length", | 64 "Total scaffold length", |
54 "GC content %", | 65 "GC content %", |
55 "# gaps in scaffolds", | 66 "# gaps in scaffolds", |
77 | 88 |
78 # extract Total bp from gfastats report | 89 # extract Total bp from gfastats report |
79 def extract_total_bp_from_gfastats(gfastats_path): | 90 def extract_total_bp_from_gfastats(gfastats_path): |
80 with open(gfastats_path, "r") as f: | 91 with open(gfastats_path, "r") as f: |
81 content = f.read() | 92 content = f.read() |
82 total_bp = re.search(r"Total scaffold length: (.+)", content).group(1) | 93 # Try colon-separated format first |
83 total_bp = int(total_bp.replace(',', '')) | 94 match = re.search(r"Total scaffold length:\s*(.+)", content) |
84 return "{:,}".format(total_bp) | 95 if not match: |
96 # If not found, try tab-separated format | |
97 match = re.search(r"Total scaffold length\t(.+)", content) | |
98 if match: | |
99 total_bp = match.group(1).replace(',', '') | |
100 return "{:,}".format(int(total_bp)) | |
101 else: | |
102 logging.error(f"Could not find Total scaffold length in {gfastats_path}") | |
103 return "N/A" | |
85 | 104 |
86 # compute EBP quality metric | 105 # compute EBP quality metric |
87 def compute_ebp_metric(haplotype, gfastats_path, qv_value): | 106 def compute_ebp_metric(haplotype, gfastats_path, qv_value): |
88 keys_needed = ["Contig N50", "Scaffold N50"] | 107 keys_needed = ["Contig N50", "Scaffold N50"] |
89 content = '' | 108 content = '' |
91 content = f.read() | 110 content = f.read() |
92 | 111 |
93 values = extract_gfastats_values(content, keys_needed) | 112 values = extract_gfastats_values(content, keys_needed) |
94 contig_n50_log = math.floor(math.log10(int(values[0].replace(',', '')))) | 113 contig_n50_log = math.floor(math.log10(int(values[0].replace(',', '')))) |
95 scaffold_n50_log = math.floor(math.log10(int(values[1].replace(',', '')))) | 114 scaffold_n50_log = math.floor(math.log10(int(values[1].replace(',', '')))) |
96 | |
97 return f"Obtained EBP quality metric for {haplotype}: {contig_n50_log}.{scaffold_n50_log}.Q{math.floor(float(qv_value))}" | 115 return f"Obtained EBP quality metric for {haplotype}: {contig_n50_log}.{scaffold_n50_log}.Q{math.floor(float(qv_value))}" |
98 | 116 |
99 # extract qv values | 117 # extract qv values |
100 def get_qv_value(file_path, order, tool, haplotype): | 118 def get_qv_value(file_path, order, tool, haplotype): |
101 try: | 119 try: |
149 | 167 |
150 # extract BUSCO info | 168 # extract BUSCO info |
151 def extract_busco_info(file_path): | 169 def extract_busco_info(file_path): |
152 busco_version = None | 170 busco_version = None |
153 lineage_info = None | 171 lineage_info = None |
172 busco_mode = None | |
173 busco_pred = None | |
154 | 174 |
155 try: | 175 try: |
156 with open(file_path, 'r') as file: | 176 with open(file_path, 'r') as file: |
157 content = file.read() | 177 content = file.read() |
158 version_match = re.search(r"# BUSCO version is: ([\d.]+)", content) | 178 version_match = re.search(r"# BUSCO version is: ([\d.]+)", content) |
159 if version_match: | 179 if version_match: |
160 busco_version = version_match.group(1) | 180 busco_version = version_match.group(1) |
161 lineage_match = re.search(r"The lineage dataset is: (.*?) \(Creation date:.*?, number of genomes: (\d+), number of BUSCOs: (\d+)\)", content) | 181 lineage_match = re.search(r"The lineage dataset is: (.*?) \(Creation date:.*?, number of (genomes|species): (\d+), number of BUSCOs: (\d+)\)", content) |
162 if lineage_match: | 182 if lineage_match: |
163 lineage_info = lineage_match.groups() | 183 lineage_info = (lineage_match.group(1), lineage_match.group(3), lineage_match.group(4)) |
164 if not lineage_info: | 184 mode_match = re.search(r"# BUSCO was run in mode: (\w+)", content) |
165 lineage_match = re.search(r"The lineage dataset is: (.*?) \(Creation date:.*?, number of species: (\d+), number of BUSCOs: (\d+)\)", content) | 185 if mode_match: |
166 if lineage_match: | 186 busco_mode = mode_match.group(1) |
167 lineage_info = lineage_match.groups() | 187 pred_match = re.search(r"# Gene predictor used: (\w+)", content) |
188 if pred_match: | |
189 busco_pred = pred_match.group(1) | |
168 | 190 |
169 except Exception as e: | 191 except Exception as e: |
170 logging.warning(f"Error reading {file_path}: {str(e)}") | 192 logging.warning(f"Error reading {file_path}: {str(e)}") |
171 | 193 |
172 return busco_version, lineage_info | 194 return busco_version, lineage_info, busco_mode, busco_pred |
173 | 195 |
174 # Function to check and generate warning messages | 196 # Function to check and generate warning messages |
175 def generate_warning_paragraphs(expected, observed, trait): | 197 def generate_warning_paragraphs(expected, observed, trait): |
176 paragraphs = [] | 198 paragraphs = [] |
177 try: | 199 try: |
193 if expected.strip().lower() != observed.strip().lower(): | 215 if expected.strip().lower() != observed.strip().lower(): |
194 message = ". Observed sex is different from Sample sex" | 216 message = ". Observed sex is different from Sample sex" |
195 paragraphs.append(Paragraph(message, styles["midiStyle"])) | 217 paragraphs.append(Paragraph(message, styles["midiStyle"])) |
196 except Exception as e: | 218 except Exception as e: |
197 logging.warning(f"Error in generating warning for {trait}: {str(e)}") | 219 logging.warning(f"Error in generating warning for {trait}: {str(e)}") |
220 | |
198 return paragraphs | 221 return paragraphs |
199 | 222 |
200 # Generate warnings for curated haplotypes (qv, kcomp, busco) | 223 # Generate warnings for curated haplotypes (qv, kcomp, busco) |
201 def generate_curated_warnings(haplotype, qv_value, completeness_value, busco_scores): | 224 def generate_curated_warnings(haplotype, qv_value, completeness_value, busco_scores): |
202 paragraphs = [] | 225 paragraphs = [] |
463 for asm_stage, stage_properties in asm_data.items(): | 486 for asm_stage, stage_properties in asm_data.items(): |
464 for haplotypes, haplotype_properties in stage_properties.items(): | 487 for haplotypes, haplotype_properties in stage_properties.items(): |
465 if isinstance(haplotype_properties, dict): | 488 if isinstance(haplotype_properties, dict): |
466 if 'gfastats--nstar-report_txt' in haplotype_properties: | 489 if 'gfastats--nstar-report_txt' in haplotype_properties: |
467 file_path = haplotype_properties['gfastats--nstar-report_txt'] | 490 file_path = haplotype_properties['gfastats--nstar-report_txt'] |
468 with open(file_path, 'r') as file: | 491 try: |
469 content = file.read() | 492 with open(file_path, 'r') as file: |
470 gfastats_data[(asm_stage, haplotypes)] = extract_gfastats_values(content, keys) | 493 content = file.read() |
494 gfastats_data[(asm_stage, haplotypes)] = extract_gfastats_values(content, keys) | |
495 except FileNotFoundError: | |
496 logging.error(f"Gfastats file not found: {file_path}") | |
497 except Exception as e: | |
498 logging.error(f"Error processing gfastats file {file_path}: {str(e)}") | |
471 | 499 |
472 gaps_per_gbp_data = {} | 500 gaps_per_gbp_data = {} |
473 for (asm_stage, haplotypes), values in gfastats_data.items(): | 501 for (asm_stage, haplotypes), values in gfastats_data.items(): |
474 try: | 502 try: |
475 gaps = float(values[gaps_index]) | 503 gaps = float(values[gaps_index].replace(',', '')) |
476 total_length = float(values[total_length_index]) | 504 total_length = float(values[total_length_index].replace(',', '')) |
477 gaps_per_gbp = round((gaps / total_length * 1_000_000_000), 2) | 505 if total_length > 0: |
478 gaps_per_gbp_data[(asm_stage, haplotypes)] = gaps_per_gbp | 506 gaps_per_gbp = round((gaps / total_length * 1_000_000_000), 2) |
479 except (ValueError, ZeroDivisionError): | 507 gaps_per_gbp_data[(asm_stage, haplotypes)] = gaps_per_gbp |
480 gaps_per_gbp_data[(asm_stage, haplotypes)] = '' | 508 else: |
509 logging.warning(f"Total length is zero for {asm_stage} {haplotypes}") | |
510 gaps_per_gbp_data[(asm_stage, haplotypes)] = 'N/A' | |
511 except (ValueError, IndexError): | |
512 logging.warning(f"Could not calculate gaps per Gbp for {asm_stage} {haplotypes}") | |
513 gaps_per_gbp_data[(asm_stage, haplotypes)] = 'N/A' | |
481 | 514 |
482 # Define the contigging table (column names) | 515 # Define the contigging table (column names) |
483 asm_table_data = [["Metrics"] + [f'{asm_stage} \n {haplotypes}' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]] | 516 asm_table_data = [["Metrics"] + [f'{asm_stage} \n {haplotypes}' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]] |
484 | 517 |
485 # Fill the table with the gfastats data | 518 # Fill the table with the gfastats data |
486 for i in range(len(display_names)): | 519 for i in range(len(display_names)): |
487 metric = display_names[i] | 520 metric = display_names[i] |
488 if metric not in exclusion_list: | 521 if metric not in exclusion_list: |
489 asm_table_data.append([metric] + [format_number(gfastats_data.get((asm_stage, haplotypes), [''])[i]) if (asm_stage, haplotypes) in gfastats_data else '' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) | 522 asm_table_data.append([metric] + [format_number(gfastats_data.get((asm_stage, haplotypes), ['N/A'] * len(keys))[i]) if (asm_stage, haplotypes) in gfastats_data else 'N/A' for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) |
490 | 523 |
491 # Add the gaps/gbp in between | 524 # Add the gaps/gbp in between |
492 asm_table_data.insert(gaps_index + 1, ['Gaps/Gbp'] + [format_number(gaps_per_gbp_data.get((asm_stage, haplotypes), '')) for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) | 525 asm_table_data.insert(gaps_index + 1, ['Gaps/Gbp'] + [format_number(gaps_per_gbp_data.get((asm_stage, haplotypes), 'N/A')) for asm_stage in asm_data for haplotypes in asm_stages if haplotypes in asm_data[asm_stage]]) |
493 | |
494 # get QV, Kmer completeness and BUSCO data | 526 # get QV, Kmer completeness and BUSCO data |
495 qv_data = {} | 527 qv_data = {} |
496 completeness_data = {} | 528 completeness_data = {} |
497 busco_data = {metric: {} for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']} | 529 busco_data = {metric: {} for metric in ['BUSCO sing.', 'BUSCO dupl.', 'BUSCO frag.', 'BUSCO miss.']} |
498 for asm_stage, stage_properties in asm_data.items(): | 530 for asm_stage, stage_properties in asm_data.items(): |
745 elements.append(asm_table) | 777 elements.append(asm_table) |
746 | 778 |
747 # Spacer | 779 # Spacer |
748 elements.append(Spacer(1, 5)) | 780 elements.append(Spacer(1, 5)) |
749 | 781 |
750 # Store BUSCO version and lineage information from each file in list | 782 # Store BUSCO information from each file in a list |
751 busco_info_list = [] | 783 busco_info_list = [] |
752 for asm_stages, stage_properties in asm_data.items(): | 784 for asm_stages, stage_properties in asm_data.items(): |
753 for i, haplotype_properties in stage_properties.items(): | 785 for i, haplotype_properties in stage_properties.items(): |
754 if isinstance(haplotype_properties, dict): | 786 if isinstance(haplotype_properties, dict): |
755 if 'busco_short_summary_txt' in haplotype_properties: | 787 if 'busco_short_summary_txt' in haplotype_properties: |
756 busco_version, lineage_info = extract_busco_info(haplotype_properties['busco_short_summary_txt']) | 788 busco_info = extract_busco_info(haplotype_properties['busco_short_summary_txt']) |
757 if busco_version and lineage_info: | 789 if all(busco_info): |
758 busco_info_list.append((busco_version, lineage_info)) | 790 busco_info_list.append(busco_info) |
791 | |
792 # Function to format BUSCO information | |
793 def format_busco_info(info): | |
794 version, (lineage, genomes, buscos), mode, predictor = info | |
795 return f"BUSCO: {version} ({mode}, {predictor}) / Lineage: {lineage} (genomes:{genomes}, BUSCOs:{buscos})" | |
759 | 796 |
760 # Checking if all elements in the list are identical | 797 # Checking if all elements in the list are identical |
761 if all(info == busco_info_list[0] for info in busco_info_list): | 798 if busco_info_list and all(info == busco_info_list[0] for info in busco_info_list): |
762 busco_version, (lineage_name, num_genomes, num_buscos) = busco_info_list[0] | 799 busco_text = format_busco_info(busco_info_list[0]) |
763 elements.append(Paragraph(f"BUSCO {busco_version} Lineage: {lineage_name} (genomes:{num_genomes}, BUSCOs:{num_buscos})", styles['miniStyle'])) | 800 elements.append(Paragraph(busco_text, styles['miniStyle'])) |
764 else: | 801 else: |
765 elements.append(Paragraph("Warning: BUSCO versions or lineage datasets are not the same across results", styles['miniStyle'])) | 802 elements.append(Paragraph("Warning! BUSCO versions or lineage datasets are not the same across results:", styles['miniStyle'])) |
766 logging.warning("WARNING!!! BUSCO versions or lineage datasets are not the same across results") | 803 logging.warning("WARNING: BUSCO versions or lineage datasets are not the same across results") |
804 for info in busco_info_list: | |
805 busco_text = format_busco_info(info) | |
806 elements.append(Paragraph(busco_text, styles['miniStyle'])) | |
767 | 807 |
768 # Page break | 808 # Page break |
769 elements.append(PageBreak()) | 809 elements.append(PageBreak()) |
770 | 810 |
771 # PDF SECTION 3 ------------------------------------------------------------------------------- | 811 # PDF SECTION 3 ------------------------------------------------------------------------------- |
804 missing_png_paragraph = Paragraph(f"<b>{haplotype}</b> HiC PNG is missing!", styles["midiStyle"]) | 844 missing_png_paragraph = Paragraph(f"<b>{haplotype}</b> HiC PNG is missing!", styles["midiStyle"]) |
805 images_with_names.append([missing_png_paragraph]) | 845 images_with_names.append([missing_png_paragraph]) |
806 | 846 |
807 # Add paragraph for the link | 847 # Add paragraph for the link |
808 if link: | 848 if link: |
809 link_html = f'<b>{haplotype}</b> <link href="{link}" color="blue">[LINK]</link>' | 849 link_html = f'<b>{haplotype}</b> <link href="{link}" color="blue">[non-permanent LINK]</link>' |
810 else: | 850 else: |
811 link_html = f'<b>{haplotype}</b> File link is missing!' | 851 link_html = f'<b>{haplotype}</b> File link is missing!' |
812 | 852 |
813 link_paragraph = Paragraph(link_html, styles["midiStyle"]) | 853 link_paragraph = Paragraph(link_html, styles["midiStyle"]) |
814 images_with_names.append([link_paragraph]) | 854 images_with_names.append([link_paragraph]) |
850 | 890 |
851 # Iterate over haplotypes in the Curated category to get K-mer spectra images | 891 # Iterate over haplotypes in the Curated category to get K-mer spectra images |
852 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}) | 892 curated_assemblies = yaml_data.get('ASSEMBLIES', {}).get('Curated', {}) |
853 | 893 |
854 # Get paths for spectra files | 894 # Get paths for spectra files |
855 spectra_files = { | 895 spectra_files = {'common': {}} |
856 'hap1': { | 896 for assembly_type, assembly_data in curated_assemblies.items(): |
857 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_hap_spectra_cn_png', None), | 897 if 'merqury_hap_spectra_cn_png' in assembly_data: |
858 }, | 898 spectra_files[assembly_type] = {'spectra_cn_png': assembly_data['merqury_hap_spectra_cn_png']} |
859 'hap2': { | 899 if 'merqury_spectra_cn_png' in assembly_data: |
860 'spectra_cn_png': curated_assemblies.get('hap2', {}).get('merqury_hap_spectra_cn_png', None), | 900 spectra_files['common']['spectra_cn_png'] = assembly_data['merqury_spectra_cn_png'] |
861 }, | 901 if 'merqury_spectra_asm_png' in assembly_data: |
862 'common': { | 902 spectra_files['common']['spectra_asm_png'] = assembly_data['merqury_spectra_asm_png'] |
863 'spectra_cn_png': curated_assemblies.get('hap1', {}).get('merqury_spectra_cn_png', None), | |
864 'spectra_asm_png': curated_assemblies.get('hap1', {}).get('merqury_spectra_asm_png', None), | |
865 } | |
866 } | |
867 | |
868 # Filter out None values and empty strings | |
869 spectra_files = {k: {sk: v for sk, v in sv.items() if v} for k, sv in spectra_files.items()} | |
870 | 903 |
871 # Determine the number of spectra-cn files and assign unique names if needed | 904 # Determine the number of spectra-cn files and assign unique names if needed |
872 spectra_cn_files = [ | 905 spectra_cn_files = [ |
873 spectra_files['common'].get('spectra_cn_png', None), | 906 file_dict.get('spectra_cn_png', None) |
874 spectra_files['hap1'].get('spectra_cn_png', None), | 907 for file_dict in spectra_files.values() |
875 spectra_files['hap2'].get('spectra_cn_png', None) | 908 if file_dict.get('spectra_cn_png') |
876 ] | 909 ] |
877 spectra_cn_files = [f for f in spectra_cn_files if f] # Filter out None values | 910 spectra_cn_files = list(set(spectra_cn_files)) # Remove duplicates |
878 | 911 |
879 if len(spectra_cn_files) == 3: | 912 if len(spectra_cn_files) == 3: |
880 # For 3 spectra-cn files | |
881 shortest_spectra_cn_file = min(spectra_cn_files, key=lambda f: len(os.path.basename(f)), default=None) | 913 shortest_spectra_cn_file = min(spectra_cn_files, key=lambda f: len(os.path.basename(f)), default=None) |
882 similar_files = [f for f in spectra_cn_files if f != shortest_spectra_cn_file] | 914 similar_files = [f for f in spectra_cn_files if f != shortest_spectra_cn_file] |
883 if similar_files: | 915 if similar_files: |
884 unique_name1, unique_name2 = find_unique_parts(os.path.basename(similar_files[0]), os.path.basename(similar_files[1])) | 916 unique_name1, unique_name2 = find_unique_parts(os.path.basename(similar_files[0]), os.path.basename(similar_files[1])) |
885 else: | 917 else: |
886 shortest_spectra_cn_file = spectra_cn_files[0] if spectra_cn_files else None | 918 shortest_spectra_cn_file = spectra_cn_files[0] if spectra_cn_files else None |
887 unique_name1 = unique_name2 = None | 919 # unique_name1 = unique_name2 = None |
888 | 920 |
889 # Create image objects and add filename below each image | 921 # Create image objects and add filename below each image |
890 images = [] | 922 images = [] |
891 | |
892 for label, file_dict in spectra_files.items(): | 923 for label, file_dict in spectra_files.items(): |
893 for key, png_file in file_dict.items(): | 924 for key, png_file in file_dict.items(): |
894 if png_file: | 925 if png_file and os.path.exists(png_file): |
895 image = Image(png_file, width=8.4 * cm, height=7 * cm) | 926 try: |
896 filename = os.path.basename(png_file) | 927 image = Image(png_file, width=8.4 * cm, height=7 * cm) |
897 | 928 filename = os.path.basename(png_file) |
898 if filename.endswith("spectra-asm.ln.png"): | 929 |
899 text = "Distribution of k-mer counts coloured by their presence in reads/assemblies" | 930 if filename.endswith("spectra-asm.ln.png"): |
900 elif filename.endswith("spectra-cn.ln.png"): | 931 text = "Distribution of k-mer counts coloured by their presence in reads/assemblies" |
901 if len(spectra_cn_files) == 3: | 932 elif filename.endswith("spectra-cn.ln.png"): |
902 # For 3 spectra-cn files use particular text | 933 if len(spectra_cn_files) == 3: |
903 if png_file == shortest_spectra_cn_file: | 934 if png_file == shortest_spectra_cn_file: |
904 text = "Distribution of k-mer counts per copy numbers found in asm (dipl.)" | 935 text = "Distribution of k-mer counts per copy numbers found in asm (dipl.)" |
936 else: | |
937 text = f"Distribution of k-mer counts per copy numbers found in {label} (hapl.)" | |
905 else: | 938 else: |
906 if png_file == spectra_files['hap1'].get('spectra_cn_png', None): | 939 text = "Distribution of k-mer counts per copy numbers found in asm" |
907 text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name1}</b> (hapl.)" | |
908 elif png_file == spectra_files['hap2'].get('spectra_cn_png', None): | |
909 text = f"Distribution of k-mer counts per copy numbers found in <b>{unique_name2}</b> (hapl.)" | |
910 else: | |
911 text = "Distribution of k-mer counts per copy numbers found in asm" | |
912 else: | 940 else: |
913 # For 2 spectra-cn files use same text | 941 text = filename |
914 text = "Distribution of k-mer counts per copy numbers found in asm" | 942 |
915 else: | 943 images.append([image, Paragraph(text, styles["midiStyle"])]) |
916 text = filename | 944 except Exception as e: |
917 | 945 logging.error(f"Error processing image {png_file}: {str(e)}") |
918 images.append([image, Paragraph(text, styles["midiStyle"])]) | |
919 | |
920 # Filter None values | |
921 images = [img for img in images if img[0] is not None] | |
922 | |
923 # Get number of rows and columns for the table | |
924 num_rows = (len(images) + 1) // 2 # +1 to handle odd numbers of images | |
925 num_columns = 2 | |
926 | 946 |
927 # Create the table with dynamic size | 947 # Create the table with dynamic size |
928 image_table_data = [[images[i * num_columns + j] if i * num_columns + j < len(images) else [] for j in range(num_columns)] for i in range(num_rows)] | 948 if images: |
929 image_table = Table(image_table_data) | 949 num_rows = (len(images) + 1) // 2 |
930 | 950 num_columns = 2 |
931 # Style the "table" | 951 image_table_data = [[images[i * num_columns + j] if i * num_columns + j < len(images) else [] for j in range(num_columns)] for i in range(num_rows)] |
932 table_style = TableStyle([ | 952 image_table = Table(image_table_data) |
933 ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), | 953 |
934 ('BOTTOMPADDING', (0, 0), (-1, -1), 20), # 20 here is a spacer between rows | 954 # Style the table |
935 ]) | 955 table_style = TableStyle([ |
936 | 956 ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'), |
937 # Set the style | 957 ('BOTTOMPADDING', (0, 0), (-1, -1), 20), # 20 here is a spacer between rows |
938 image_table.setStyle(table_style) | 958 ]) |
939 | 959 |
940 # Add image table to elements | 960 image_table.setStyle(table_style) |
941 elements.append(image_table) | 961 elements.append(image_table) |
962 else: | |
963 elements.append(Paragraph("No K-mer spectra images available.", styles["midiStyle"])) | |
942 | 964 |
943 # Increase counter by the number of PNGs added | 965 # Increase counter by the number of PNGs added |
944 counter += len(images) | 966 counter += len(images) |
945 | 967 |
946 # If counter is a multiple of 4, insert a page break and reset counter | 968 # If counter is a multiple of 4, insert a page break and reset counter |