Mercurial > repos > public-health-bioinformatics > adjust_bracken_for_unclassified_reads
changeset 8:3e7de8046d72 draft
planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 888d26702a84c2f8fd1428aff8cd869e94cc0bae-dirty
author | public-health-bioinformatics |
---|---|
date | Thu, 27 Oct 2022 00:13:18 +0000 |
parents | 4bd3a972c4c4 |
children | cf66e6ea92ae |
files | adjust_bracken_for_unclassified_reads.py adjust_bracken_for_unclassified_reads.xml test-data/output/SRR17907745_bracken_abundances_adjusted.tsv |
diffstat | 3 files changed, 17 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/adjust_bracken_for_unclassified_reads.py Thu Mar 10 21:18:57 2022 +0000 +++ b/adjust_bracken_for_unclassified_reads.py Thu Oct 27 00:13:18 2022 +0000 @@ -47,19 +47,7 @@ kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total'] total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs - percent_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) - - bracken_unclassified_entry = { - 'name': 'unclassified', - 'taxonomy_id': 0, - 'taxonomy_lvl': 'U', - 'kraken_assigned_seqs': kraken_report_unclassified_seqs, - 'bracken_assigned_seqs': kraken_report_unclassified_seqs, - 'kraken_fraction_total_seqs': percent_unclassified, - 'bracken_fraction_total_seqs': 0.0, - } - - bracken_abundances = [bracken_unclassified_entry] + bracken_abundances + fraction_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) output_fieldnames = [ 'name', @@ -82,7 +70,20 @@ bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs) b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs) - for b in sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True): + bracken_unclassified_entry = { + 'name': 'unclassified', + 'taxonomy_id': 0, + 'taxonomy_lvl': 'U', + 'kraken_assigned_seqs': kraken_report_unclassified_seqs, + 'bracken_assigned_seqs': kraken_report_unclassified_seqs, + 'total_seqs': total_seqs, + 'kraken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified), + 'bracken_fraction_total_seqs': '{:.6f}'.format(fraction_unclassified), + } + + bracken_abundances = sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True) + bracken_abundances = [bracken_unclassified_entry] + bracken_abundances + for b in bracken_abundances: writer.writerow(b)
--- a/adjust_bracken_for_unclassified_reads.xml Thu Mar 10 21:18:57 2022 +0000 +++ b/adjust_bracken_for_unclassified_reads.xml Thu Oct 27 00:13:18 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="adjust_bracken_for_unclassified_reads" name="Adjust Bracken Report for Unclassified Reads" version="0.1.0"> +<tool id="adjust_bracken_for_unclassified_reads" name="Adjust Bracken Report for Unclassified Reads" version="0.2.0+galaxy0"> <description>Adjust bracken report to account for unclassified reads</description> <requirements> </requirements>
--- a/test-data/output/SRR17907745_bracken_abundances_adjusted.tsv Thu Mar 10 21:18:57 2022 +0000 +++ b/test-data/output/SRR17907745_bracken_abundances_adjusted.tsv Thu Oct 27 00:13:18 2022 +0000 @@ -1,9 +1,9 @@ name taxonomy_id taxonomy_lvl kraken_assigned_seqs bracken_assigned_seqs total_seqs kraken_fraction_total_seqs bracken_fraction_total_seqs +unclassified 0 U 110613 110613 2570868 0.043026 0.043026 Klebsiella quasipneumoniae 1463165 S 484958 1017029 2570868 0.188636 0.395598 Escherichia coli 562 S 181539 751229 2570868 0.070614 0.292208 Klebsiella pneumoniae 573 S 94362 315713 2570868 0.036704 0.122804 Citrobacter freundii 546 S 116592 132960 2570868 0.045351 0.051718 -unclassified 0 U 110613 110613 2570868 0.043026 0.043026 Enterobacter hormaechei 158836 S 74706 85010 2570868 0.029059 0.033067 Enterobacter cloacae 550 S 73130 79124 2570868 0.028446 0.030777 Klebsiella variicola 244366 S 4879 10752 2570868 0.001898 0.004182