Mercurial > repos > dfornika > screen_abricate_report
changeset 1:40003338a8e8 draft
planemo upload for repository https://github.com/dfornika/galaxytools/blob/master/tools/screen_abricate_report commit d9732cd3279d03dcc498bf2eb903f9e6120a9d85-dirty
author | dfornika |
---|---|
date | Fri, 27 Sep 2019 13:31:43 -0400 |
parents | 2ffd23634c1e |
children | 378696e5f81c |
files | screen_abricate_report.py screen_abricate_report.xml test-data/gene_detection_status.tsv test-data/screen.tsv test-data/screened_report.tsv test-data/screening_file.tsv |
diffstat | 6 files changed, 44 insertions(+), 18 deletions(-) [+] |
line wrap: on
line diff
--- a/screen_abricate_report.py Thu Sep 26 19:37:27 2019 -0400 +++ b/screen_abricate_report.py Fri Sep 27 13:31:43 2019 -0400 @@ -17,30 +17,42 @@ screen.append(row) return screen -def get_abricate_report_fieldnames(abricate_report): - with open(abricate_report) as f: +def get_fieldnames(input_file): + with open(input_file) as f: reader = csv.DictReader(f, delimiter="\t", quotechar='"') row = next(reader) fieldnames = row.keys() return fieldnames def main(args): - screen = parse_screen_file(args.screen) - abricate_report_fieldnames = get_abricate_report_fieldnames(args.abricate_report) - with open(args.abricate_report) as f: - reader = csv.DictReader(f, delimiter="\t", quotechar='"') - writer = csv.DictWriter(sys.stdout, delimiter="\t", quotechar='"', fieldnames=abricate_report_fieldnames) - writer.writeheader() - for row in reader: - for gene in screen: - if re.search(gene['regex'], row['GENE']): - writer.writerow(row) - + screen = parse_screen_file(args.screening_file) + abricate_report_fieldnames = get_fieldnames(args.abricate_report) + gene_detection_status_fieldnames = ['gene_name', 'detected'] + with open(args.abricate_report, 'r') as f1, open(args.screened_report, 'w') as f2, open(args.gene_detection_status, 'w') as f3: + abricate_report_reader = csv.DictReader(f1, delimiter="\t", quotechar='"') + screened_report_writer = csv.DictWriter(f2, delimiter="\t", quotechar='"', fieldnames=abricate_report_fieldnames) + gene_detection_status_writer = csv.DictWriter(f3, delimiter="\t", quotechar='"', fieldnames=gene_detection_status_fieldnames) + screened_report_writer.writeheader() + gene_detection_status_writer.writeheader() + + for gene in screen: + gene_detection_status = { + 'gene_name': gene['gene_name'], + 'detected': False + } + for abricate_report_row in abricate_report_reader: + if re.search(gene['regex'], abricate_report_row['GENE']): + gene_detection_status['detected'] = True + screened_report_writer.writerow(abricate_report_row) + gene_detection_status_writer.writerow(gene_detection_status) + f1.seek(0) # return file pointer to start of abricate report if __name__ == '__main__': parser = argparse.ArgumentParser() - parser.add_argument("--screen", help="TSV file defining genes to screen for") - parser.add_argument("abricate_report", help="Abricate output") + parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)") + parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)") + parser.add_argument("--screened_report", help="Output: Screened abricate report including only genes of interest (tsv)") + parser.add_argument("--gene_detection_status", help="Output: detection status for all genes listed in the screening file (tsv)") args = parser.parse_args() main(args)
--- a/screen_abricate_report.xml Thu Sep 26 19:37:27 2019 -0400 +++ b/screen_abricate_report.xml Fri Sep 27 13:31:43 2019 -0400 @@ -6,7 +6,8 @@ '${__tool_directory__}/screen_abricate_report.py' '${abricate_report}' --screen '${screening_file.fields.path}' - > '${output}' + --screened_report '${screened_report}' + --gene_detection_status '${gene_detection_status}' ]]></command> <inputs> <param name="abricate_report" type="data" format="tabular" /> @@ -17,7 +18,8 @@ </param> </inputs> <outputs> - <data name="output" type="data" format="tabular" label="Screened Abricate Report" /> + <data name="screened_report" type="data" format="tabular" label="Screened Abricate Report" /> + <data name="gene_detection_status" type="data" format="tabular" label="Gene Detection Status" /> </outputs> <tests> <test>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gene_detection_status.tsv Fri Sep 27 13:31:43 2019 -0400 @@ -0,0 +1,4 @@ +gene_name detected +KPC False +NDM True +OXA True
--- a/test-data/screen.tsv Thu Sep 26 19:37:27 2019 -0400 +++ b/test-data/screen.tsv Fri Sep 27 13:31:43 2019 -0400 @@ -1,4 +1,4 @@ -gene regex +gene_name regex KPC KPC NDM NDM OXA OXA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/screened_report.tsv Fri Sep 27 13:31:43 2019 -0400 @@ -0,0 +1,4 @@ +#FILE %IDENTITY END SEQUENCE COVERAGE_MAP %COVERAGE DATABASE ACCESSION START PRODUCT GAPS COVERAGE GENE +SRR9113479_assembly 100.00 990 32 =============== 100.00 card JX262694:1-814 178 card~~~NDM-7~~~JX262694:1-814 NDM-7 is a beta-lactamase found in Escherichia coli 0/0 1-813/813 NDM-7 +SRR9113479_assembly 99.08 18189 25 ...........==== 23.89 card KT736121.1:1-2722 17540 card~~~OXA-368~~~KT736121.1:1-2722 OXA-368 is a beta-lactamase found in Aeromonas sobria. From the Lahey list of OXA beta-lactamases. 0/0 2072-2721/2721 OXA-368 +SRR9113479_assembly 100.00 19084 25 =============== 100.00 card JN420336.1:1400-2231 18254 card~~~OXA-1~~~JN420336.1:1400-2231 OXA-1 is a beta-lactamase found in E. coli 0/0 1-831/831 OXA-1