# HG changeset patch
# User dfornika
# Date 1569605503 14400
# Node ID 40003338a8e8b470c358d973f250dc57b5dda000
# Parent 2ffd23634c1e998b852894dfbfe905f7d3981886
planemo upload for repository https://github.com/dfornika/galaxytools/blob/master/tools/screen_abricate_report commit d9732cd3279d03dcc498bf2eb903f9e6120a9d85-dirty
diff -r 2ffd23634c1e -r 40003338a8e8 screen_abricate_report.py
--- a/screen_abricate_report.py Thu Sep 26 19:37:27 2019 -0400
+++ b/screen_abricate_report.py Fri Sep 27 13:31:43 2019 -0400
@@ -17,30 +17,42 @@
screen.append(row)
return screen
-def get_abricate_report_fieldnames(abricate_report):
- with open(abricate_report) as f:
+def get_fieldnames(input_file):
+ with open(input_file) as f:
reader = csv.DictReader(f, delimiter="\t", quotechar='"')
row = next(reader)
fieldnames = row.keys()
return fieldnames
def main(args):
- screen = parse_screen_file(args.screen)
- abricate_report_fieldnames = get_abricate_report_fieldnames(args.abricate_report)
- with open(args.abricate_report) as f:
- reader = csv.DictReader(f, delimiter="\t", quotechar='"')
- writer = csv.DictWriter(sys.stdout, delimiter="\t", quotechar='"', fieldnames=abricate_report_fieldnames)
- writer.writeheader()
- for row in reader:
- for gene in screen:
- if re.search(gene['regex'], row['GENE']):
- writer.writerow(row)
-
+ screen = parse_screen_file(args.screening_file)
+ abricate_report_fieldnames = get_fieldnames(args.abricate_report)
+ gene_detection_status_fieldnames = ['gene_name', 'detected']
+ with open(args.abricate_report, 'r') as f1, open(args.screened_report, 'w') as f2, open(args.gene_detection_status, 'w') as f3:
+ abricate_report_reader = csv.DictReader(f1, delimiter="\t", quotechar='"')
+ screened_report_writer = csv.DictWriter(f2, delimiter="\t", quotechar='"', fieldnames=abricate_report_fieldnames)
+ gene_detection_status_writer = csv.DictWriter(f3, delimiter="\t", quotechar='"', fieldnames=gene_detection_status_fieldnames)
+ screened_report_writer.writeheader()
+ gene_detection_status_writer.writeheader()
+
+ for gene in screen:
+ gene_detection_status = {
+ 'gene_name': gene['gene_name'],
+ 'detected': False
+ }
+ for abricate_report_row in abricate_report_reader:
+ if re.search(gene['regex'], abricate_report_row['GENE']):
+ gene_detection_status['detected'] = True
+ screened_report_writer.writerow(abricate_report_row)
+ gene_detection_status_writer.writerow(gene_detection_status)
+ f1.seek(0) # return file pointer to start of abricate report
if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument("--screen", help="TSV file defining genes to screen for")
- parser.add_argument("abricate_report", help="Abricate output")
+ parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)")
+ parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)")
+ parser.add_argument("--screened_report", help="Output: Screened abricate report including only genes of interest (tsv)")
+ parser.add_argument("--gene_detection_status", help="Output: detection status for all genes listed in the screening file (tsv)")
args = parser.parse_args()
main(args)
diff -r 2ffd23634c1e -r 40003338a8e8 screen_abricate_report.xml
--- a/screen_abricate_report.xml Thu Sep 26 19:37:27 2019 -0400
+++ b/screen_abricate_report.xml Fri Sep 27 13:31:43 2019 -0400
@@ -6,7 +6,8 @@
'${__tool_directory__}/screen_abricate_report.py'
'${abricate_report}'
--screen '${screening_file.fields.path}'
- > '${output}'
+ --screened_report '${screened_report}'
+ --gene_detection_status '${gene_detection_status}'
]]>
@@ -17,7 +18,8 @@
-
+
+
diff -r 2ffd23634c1e -r 40003338a8e8 test-data/gene_detection_status.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gene_detection_status.tsv Fri Sep 27 13:31:43 2019 -0400
@@ -0,0 +1,4 @@
+gene_name detected
+KPC False
+NDM True
+OXA True
diff -r 2ffd23634c1e -r 40003338a8e8 test-data/screen.tsv
--- a/test-data/screen.tsv Thu Sep 26 19:37:27 2019 -0400
+++ b/test-data/screen.tsv Fri Sep 27 13:31:43 2019 -0400
@@ -1,4 +1,4 @@
-gene regex
+gene_name regex
KPC KPC
NDM NDM
OXA OXA
diff -r 2ffd23634c1e -r 40003338a8e8 test-data/screened_report.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/screened_report.tsv Fri Sep 27 13:31:43 2019 -0400
@@ -0,0 +1,4 @@
+#FILE %IDENTITY END SEQUENCE COVERAGE_MAP %COVERAGE DATABASE ACCESSION START PRODUCT GAPS COVERAGE GENE
+SRR9113479_assembly 100.00 990 32 =============== 100.00 card JX262694:1-814 178 card~~~NDM-7~~~JX262694:1-814 NDM-7 is a beta-lactamase found in Escherichia coli 0/0 1-813/813 NDM-7
+SRR9113479_assembly 99.08 18189 25 ...........==== 23.89 card KT736121.1:1-2722 17540 card~~~OXA-368~~~KT736121.1:1-2722 OXA-368 is a beta-lactamase found in Aeromonas sobria. From the Lahey list of OXA beta-lactamases. 0/0 2072-2721/2721 OXA-368
+SRR9113479_assembly 100.00 19084 25 =============== 100.00 card JN420336.1:1400-2231 18254 card~~~OXA-1~~~JN420336.1:1400-2231 OXA-1 is a beta-lactamase found in E. coli 0/0 1-831/831 OXA-1
diff -r 2ffd23634c1e -r 40003338a8e8 test-data/screening_file.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/screening_file.tsv Fri Sep 27 13:31:43 2019 -0400
@@ -0,0 +1,4 @@
+gene_name regex
+KPC KPC
+NDM NDM
+OXA OXA