changeset 3:1c1c680c70a0 draft default tip

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/screen_abricate_report commit 2ec76aac2fcf466fc16091bfff8b7cb83fd92467-dirty"
author dfornika
date Thu, 02 Jan 2020 21:04:04 +0000
parents 378696e5f81c
children
files screen_abricate_report.py screen_abricate_report.xml test-data/SAMN13042171_abricate_report.tsv test-data/SAMN13042171_abricate_report_screened.tsv test-data/SAMN13042171_gene_detection_status.tsv test-data/abricate_report.tsv test-data/abricate_report_screening_files.loc test-data/gene_detection_status.tsv test-data/screen.tsv test-data/screen.yaml test-data/screened_report.tsv test-data/screening_file.tsv tool_data_table_conf.xml.test
diffstat 13 files changed, 170 insertions(+), 113 deletions(-) [+]
line wrap: on
line diff
--- a/screen_abricate_report.py	Fri Sep 27 13:34:45 2019 -0400
+++ b/screen_abricate_report.py	Thu Jan 02 21:04:04 2020 +0000
@@ -3,11 +3,29 @@
 from __future__ import print_function
 
 import argparse
-import os
+import csv
 import re
-import sys
-import csv
-from pprint import pprint
+
+
+class Range(object):
+    """
+    Used to limit the min_coverage and min_identity args to range 0.0 - 100.0
+    """
+    def __init__(self, start, end):
+        self.start = start
+        self.end = end
+
+    def __eq__(self, other):
+        return self.start <= other <= self.end
+
+    def __contains__(self, item):
+        return self.__eq__(item)
+
+    def __iter__(self):
+        yield self
+
+    def __repr__(self):
+        return  str(self.start) + " - " + str(self.end)
 
 def parse_screen_file(screen_file):
     screen = []
@@ -17,21 +35,35 @@
             screen.append(row)
     return screen
 
+
 def get_fieldnames(input_file):
     with open(input_file) as f:
         reader = csv.DictReader(f, delimiter="\t", quotechar='"')
         row = next(reader)
     fieldnames = row.keys()
     return fieldnames
-    
+
+def detect_gene(abricate_report_row, regex, min_coverage, min_identity):
+    gene_of_interest = bool(re.search(regex, abricate_report_row['GENE']))
+    sufficient_coverage = float(abricate_report_row['%COVERAGE']) >= min_coverage
+    sufficient_identity = float(abricate_report_row['%IDENTITY']) >= min_identity
+    if gene_of_interest and sufficient_coverage and sufficient_identity:
+        return True
+    else:
+        return False
+
+
 def main(args):
     screen = parse_screen_file(args.screening_file)
-    abricate_report_fieldnames = get_fieldnames(args.abricate_report)
     gene_detection_status_fieldnames = ['gene_name', 'detected']
-    with open(args.abricate_report, 'r') as f1, open(args.screened_report, 'w') as f2, open(args.gene_detection_status, 'w') as f3:
+    with open(args.abricate_report, 'r') as f1, \
+            open(args.screened_report, 'w') as f2, \
+            open(args.gene_detection_status, 'w') as f3:
         abricate_report_reader = csv.DictReader(f1, delimiter="\t", quotechar='"')
-        screened_report_writer = csv.DictWriter(f2, delimiter="\t", quotechar='"', fieldnames=abricate_report_fieldnames)
-        gene_detection_status_writer = csv.DictWriter(f3, delimiter="\t", quotechar='"', fieldnames=gene_detection_status_fieldnames)
+        screened_report_writer = csv.DictWriter(f2, delimiter="\t", quotechar='"',
+                                                fieldnames=abricate_report_reader.fieldnames)
+        gene_detection_status_writer = csv.DictWriter(f3, delimiter="\t", quotechar='"',
+                                                      fieldnames=gene_detection_status_fieldnames)
         screened_report_writer.writeheader()
         gene_detection_status_writer.writeheader()
 
@@ -41,18 +73,20 @@
                 'detected': False
             }
             for abricate_report_row in abricate_report_reader:
-                if re.search(gene['regex'], abricate_report_row['GENE']):
+                if detect_gene(abricate_report_row, gene['regex'], args.min_coverage, args.min_identity):
                     gene_detection_status['detected'] = True
                     screened_report_writer.writerow(abricate_report_row)
             gene_detection_status_writer.writerow(gene_detection_status)
-            f1.seek(0) # return file pointer to start of abricate report
-
+            f1.seek(0)  # return file pointer to start of abricate report
+            next(abricate_report_reader)
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)")
     parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)")
-    parser.add_argument("--screened_report", help="Output: Screened abricate report including only genes of interest (tsv)")
-    parser.add_argument("--gene_detection_status", help="Output: detection status for all genes listed in the screening file (tsv)")
+    parser.add_argument("--screened_report", help=("Output: Screened abricate report, including only genes of interest (tsv)"))
+    parser.add_argument("--gene_detection_status", help=("Output: detection status for all genes listed in the screening file (tsv)"))
+    parser.add_argument("--min_coverage", type=float,  default=90.0, choices=Range(0.0, 100.0), help=("Minimum percent coverage"))
+    parser.add_argument("--min_identity", type=float, default=90.0, choices=Range(0.0, 100.0), help=("Minimum percent identity"))
     args = parser.parse_args()
     main(args)
--- a/screen_abricate_report.xml	Fri Sep 27 13:34:45 2019 -0400
+++ b/screen_abricate_report.xml	Thu Jan 02 21:04:04 2020 +0000
@@ -1,33 +1,76 @@
-<tool id="screen_abricate_report" name="Screen Abricate Report" version="0.1.0">
+<tool id="screen_abricate_report" name="Screen Abricate Report" version="0.3.0">
     <description>Screens an abricate report for genes of interest</description>
     <requirements>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
         '${__tool_directory__}/screen_abricate_report.py'
              '${abricate_report}'
-             --screening_file '${screening_file.fields.path}'
+             #if str( $screening_file_source.screening_file_source_selector ) == "tool_data_table":
+               #set $input_screening_file = str( $screening_file_source.screening_file.fields.path )
+             #else:
+               #set $input_screening_file = str( $screening_file_source.screening_file )
+             #end if
+             --screening_file '${input_screening_file}'
              --screened_report '${screened_report}'
              --gene_detection_status '${gene_detection_status}'
+             --min_coverage '${min_coverage}'
+             --min_identity '${min_identity}' &&
+             cp '${input_screening_file}' '${output_screening_file}'
     ]]></command>
     <inputs>
+        <conditional name="screening_file_source">
+            <param name="screening_file_source_selector" type="select" label="Select a gene screening file from your history or use one from a tool data table?"
+                   help="Screening files must be stored in the 'abricate_report_screening_files' tool data table">
+                <option value="tool_data_table">From tool data table</option>
+                <option value="history">From history</option>
+            </param>
+            <when value="tool_data_table">
+                <param name="screening_file" type="select" format="tabular">
+	            <options from_data_table="abricate_report_screening_files">
+	                <validator type="no_options" message="No abricate report screening files are available" />
+                    </options>
+	        </param>
+            </when>
+            <when value="history">
+                <param name="screening_file" type="data" format="tabular" label="Gene screening file" help="A two-column tab-delimited file with gene names and regular expressions" />
+            </when>
+        </conditional>
         <param name="abricate_report" type="data" format="tabular" />
-        <param name="screening_file" type="select" format="tabular">
-	    <options from_data_table="abricate_report_screening_files">
-	        <validator type="no_options" message="No abricate report screening files are available" />
-            </options>
-	</param>
+        <param name="min_coverage" type="float" min="0.0" value="90.0" max="100.0" />
+        <param name="min_identity" type="float" min="0.0" value="90.0" max="100.0" />
     </inputs>
     <outputs>
-        <data name="screened_report" type="data" format="tabular" label="Screened Abricate Report" />
-        <data name="gene_detection_status" type="data" format="tabular" label="Gene Detection Status" />
+        <data name="screened_report" format="tabular" label="Screened Abricate Report" />
+        <data name="gene_detection_status" format="tabular" label="Gene Detection Status" />
+        <data name="output_screening_file" format="tabular" />
     </outputs>
     <tests>
         <test>
-            <param name="abricate_report" value="abricate_report.tsv"/>
-            <param name="screen" value="screen.tsv"/>
+            <param name="screening_file_source.screening_file_source_selector" value="tool_data_table" />
+            <param name="screening_file_source.screening_file" value="test_entry"/>
+            <param name="abricate_report" value="SAMN13042171_abricate_report.tsv"/>
+	    <output name="screened_report" file="SAMN13042171_abricate_report_screened.tsv" ftype="tabular"/>
+	    <output name="gene_detection_status" file="SAMN13042171_gene_detection_status.tsv" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="screening_file_source.screening_file_source_selector" value="history" />
+            <param name="screening_file_source.screening_file" value="screening_file.tsv"/>
+            <param name="abricate_report" value="SAMN13042171_abricate_report.tsv"/>
+	    <output name="screened_report" file="SAMN13042171_abricate_report_screened.tsv" ftype="tabular"/>
+	    <output name="gene_detection_status" file="SAMN13042171_gene_detection_status.tsv" ftype="tabular"/>
         </test>
     </tests>
     <help><![CDATA[
+    This tool is used to filter (ie. 'screen') an abricate report to only include 
+    specific genes of interest. 
+
+    A list of genes is supplied in a 'screening file', which is a two-column .tsv 
+    with headers 'gene_name' and 'regex'. The first column gives the name of the 
+    gene of interest, and the second column is a regular expression that can be used 
+    to identify examples of that gene in the 'GENE' column of the abricate report.
+
+    For example, one might use the regex '^KPC-\d+$' to identify all alleles of the
+    KPC gene (KPC-2, KPC-3, KPC-4, ..., KPC-10, ...)
     ]]></help>
     <citations>
     </citations>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SAMN13042171_abricate_report.tsv	Thu Jan 02 21:04:04 2020 +0000
@@ -0,0 +1,47 @@
+#FILE	SEQUENCE	START	END	STRAND	GENE	COVERAGE	COVERAGE_MAP	GAPS	%COVERAGE	%IDENTITY	DATABASE	ACCESSION	PRODUCT	RESISTANCE
+SAMN13042171.fasta	contig00001	181303	181975	+	kdpE	1-673/678	===============	0/0	99.26	76.08	card	U00096.3:721056-721734	kdpE is a transcriptional activator that is part of the two-component system KdpD/KdpE that is studied for its regulatory role in potassium transport and has been identified as an adaptive regulator involved in the virulence and intracellular survival of pathogenic bacteria. kdpE regulates a range of virulence loci through direct promoter binding.	aminoglycoside
+SAMN13042171.fasta	contig00001	362878	363231	-	ramA	21-375/375	========/======	1/1	94.40	80.56	card	JQ727668:1-376	RamA (resistance antibiotic multiple) is a positive regulator of AcrAB-TolC and leads to high level multidrug resistance in Klebsiella pneumoniae Salmonella enterica and Enterobacter aerugenes increasing the expression of both the mar operon as well as AcrAB. RamA also decreases OmpF expression.	cephamycin/aminocoumarin/aminoglycoside/carbapenem/fluoroquinolone/cephalosporin/triclosan/tetracycline/acridine_dye/phenicol/monobactam/glycylcycline/antibacterial_free_fatty_acids/penem/diaminopyrimidine/penam/macrolide/rifamycin
+SAMN13042171.fasta	contig00001	568102	569295	+	Klebsiella_pneumoniae_acrA	1-1197/1197	========/======	2/3	99.75	95.07	card	AJ318073.1:794-1991	AcrA is a subunit of the AcrAB multidrug efflux system that in K. pneumoniae which is encoded by the acrRAB operon.	aminocoumarin/carbapenem/aminoglycoside/triclosan/tetracycline/acridine_dye/cephalosporin/fluoroquinolone/monobactam/glycylcycline/phenicol/diaminopyrimidine/antibacterial_free_fatty_acids/macrolide/penam/rifamycin
+SAMN13042171.fasta	contig00001	569318	572462	+	acrB	1-3148/3150	========/======	1/3	99.84	83.74	card	U00096.3:481254-484404	Protein subunit of AcrA-AcrB-TolC multidrug efflux complex. AcrB functions as a herterotrimer which forms the inner membrane component and is primarily responsible for substrate recognition and energy transduction by acting as a drug/proton antiporter.	antibacterial_free_fatty_acids/diaminopyrimidine/rifamycin/penam/macrolide/aminoglycoside/carbapenem/aminocoumarin/phenicol/monobactam/glycylcycline/fluoroquinolone/cephalosporin/tetracycline/triclosan/acridine_dye
+SAMN13042171.fasta	contig00001	698224	699350	+	Escherichia_coli_ampH	1-1127/1158	===============	0/0	97.32	77.46	card	AP012030.1:395554-396712	AmpH is a class C ampC-like beta-lactamase and penicillin-binding protein identified in Escherichia coli.	cephalosporin/penam
+SAMN13042171.fasta	contig00002	345289	346827	-	Klebsiella_pneumoniae_KpnH	1-1539/1539	===============	0/0	100.00	84.41	card	ASTU01000063.1:61249-62788	KpnH consists of ~511 residues resembles EmrB of E. coli and is probably a translocase in the KpnGH-TolC efflux protein in K. pneumoniae. Disruption of the pump components KpnG-KpnH signficantly decrease resistance to azithromycin ceftazidime ciprofloxacin ertapenem erythromycin gentamicin imipenem ticarcillin norfloxacin polymyxin-B piperacillin spectinomycin tobramycin and streptomycin.	isoniazid/lincosamide/benzalkonium_chloride/diaminopyrimidine/penem/bicyclomycin/antibacterial_free_fatty_acids/macrolide/penam/nucleoside/nitroimidazole/peptide/rhodamine/acridine_dye/oxazolidinone/cephalosporin/fluoroquinolone/phenicol/fosfomycin/rifamycin/carbapenem/aminoglycoside/tetracycline/glycylcycline
+SAMN13042171.fasta	contig00002	346843	348015	-	Klebsiella_pneumoniae_KpnG	1-1173/1173	===============	0/0	100.00	94.54	card	ACWO01000051.1:22093-23266	KpnG consists of ~390 residues and resembles EmrA of E. coli. Disruption of the pump components KpnG-KpnH signficantly decrease resistance to azithromycin ceftazidime ciprofloxacin ertapenem erythromycin gentamicin imipenem ticarcillin norfloxacin polymyxin-B piperacillin spectinomycin tobramycin and streptomycin	penam/macrolide/nitroimidazole/nucleoside/benzalkonium_chloride/lincosamide/isoniazid/bicyclomycin/antibacterial_free_fatty_acids/diaminopyrimidine/penem/fluoroquinolone/cephalosporin/acridine_dye/oxazolidinone/fosfomycin/phenicol/rhodamine/peptide/rifamycin/tetracycline/glycylcycline/aminoglycoside/carbapenem
+SAMN13042171.fasta	contig00002	348162	348671	-	emrR	1-510/531	===============	0/0	96.05	83.33	card	U00096.3:2810770-2811301	EmrR is a negative regulator for the EmrAB-TolC multidrug efflux pump in E. coli. Mutations lead to EmrAB-TolC overexpression.	rifamycin/nitroimidazole/nucleoside/macrolide/penam/bicyclomycin/antibacterial_free_fatty_acids/diaminopyrimidine/lincosamide/isoniazid/benzalkonium_chloride/fosfomycin/glycylcycline/phenicol/cephalosporin/fluoroquinolone/oxazolidinone/acridine_dye/tetracycline/rhodamine/peptide
+SAMN13042171.fasta	contig00002	389327	392479	-	oqxB	1-3153/3153	===============	0/0	100.00	95.53	card	EU370913.1:47851-51004	RND efflux pump conferring resistance to fluoroquinolone	tetracycline/triclosan/acridine_dye/fluoroquinolone/penam/nitrofuran/macrolide/phenicol/monobactam/glycylcycline/aminocoumarin/aminoglycoside/carbapenem/diaminopyrimidine/antibacterial_free_fatty_acids
+SAMN13042171.fasta	contig00002	392503	393678	-	oqxA	1-1176/1176	===============	0/0	100.00	93.71	card	EU370913.1:46652-47828	RND efflux pump conferring resistance to fluoroquinolone	phenicol/monobactam/glycylcycline/penam/fluoroquinolone/nitrofuran/macrolide/tetracycline/triclosan/acridine_dye/antibacterial_free_fatty_acids/aminoglycoside/carbapenem/diaminopyrimidine/aminocoumarin
+SAMN13042171.fasta	contig00003	27739	29154	-	mdtK	1-1416/1425	========/======	2/2	99.30	76.71	card	CP014358.1:2161326-2162751	A multidrug and toxic compound extrusions (MATE) transporter conferring resistance to norfloxacin doxorubicin and acriflavine.	glycylcycline/fluoroquinolone/tetracycline/acridine_dye
+SAMN13042171.fasta	contig00003	144300	145294	+	mdtA	178-1172/1248	.=======/======	2/2	79.65	78.51	card	U00096:2154016-2155264	MdtA is the membrane fusion protein of the multidrug efflux complex mdtABC.	antibacterial_free_fatty_acids/aminoglycoside/diaminopyrimidine/carbapenem/aminocoumarin/phenicol/glycylcycline/monobactam/penam/fluoroquinolone/macrolide/acridine_dye/triclosan/tetracycline
+SAMN13042171.fasta	contig00003	145364	148486	+	mdtB	1-3123/3123	========/======	2/4	99.94	80.90	card	U00096:2155263-2158386	MdtB is a transporter that forms a heteromultimer complex with MdtC to form a multidrug transporter. MdtBC is part of the MdtABC-TolC efflux complex.	aminocoumarin/aminoglycoside/diaminopyrimidine/carbapenem/antibacterial_free_fatty_acids/acridine_dye/tetracycline/triclosan/penam/fluoroquinolone/macrolide/phenicol/glycylcycline/monobactam
+SAMN13042171.fasta	contig00003	148487	151564	+	mdtC	1-3078/3078	===============	0/0	100.00	82.00	card	U00096:2158386-2161464	MdtC is a transporter that forms a heteromultimer complex with MdtB to form a multidrug transporter. MdtBC is part of the MdtABC-TolC efflux complex. In the absence of MdtB MdtC can form a homomultimer complex that results in a functioning efflux complex with a narrower drug specificity. mdtC corresponds to 3 loci in Pseudomonas aeruginosa PAO1 (gene name: muxC/muxB) and 3 loci in Pseudomonas aeruginosa LESB58.	diaminopyrimidine/carbapenem/aminoglycoside/antibacterial_free_fatty_acids/aminocoumarin/glycylcycline/monobactam/phenicol/acridine_dye/triclosan/tetracycline/macrolide/fluoroquinolone/penam
+SAMN13042171.fasta	contig00003	152979	153244	+	baeS	2-267/1404	===............	0/0	18.95	80.45	card	AP009048:2165013-2166417	BaeS is a sensor kinase in the BaeSR regulatory system. While it phosphorylates BaeR to increase its activity BaeS is not necessary for overexpressed BaeR to confer resistance.	aminocoumarin/carbapenem/diaminopyrimidine/aminoglycoside/antibacterial_free_fatty_acids/tetracycline/triclosan/acridine_dye/macrolide/fluoroquinolone/penam/monobactam/glycylcycline/phenicol
+SAMN13042171.fasta	contig00003	154462	155174	+	baeR	1-713/723	===============	0/0	98.62	82.61	card	AP009048.1:2166413-2167136	BaeR is a response regulator that promotes the expression of MdtABC and AcrD efflux complexes.	phenicol/monobactam/glycylcycline/tetracycline/triclosan/acridine_dye/penam/fluoroquinolone/macrolide/aminoglycoside/carbapenem/diaminopyrimidine/antibacterial_free_fatty_acids/aminocoumarin
+SAMN13042171.fasta	contig00003	277916	279555	-	yojI	1-1640/1644	========/======	6/6	99.57	75.65	card	U00096.3:2306972-2308616	YojI mediates resistance to the peptide antibiotic microcin J25 when it is expressed from a multicopy vector. YojI is capable of pumping out microcin molecules.  The outer membrane protein TolC in addition to YojI is required for export of microcin J25 out of the cell. Microcin J25 is thus the first known substrate for YojI.	peptide/nitroimidazole/rifamycin/pleuromutilin/penam/fluoroquinolone/cephalosporin/macrolide/tetracycline/acridine_dye
+SAMN13042171.fasta	contig00004	25617	26032	+	FosA5	1-416/420	===============	0/0	99.05	93.51	card	KP143090.1:1200-1620	fosA5 is a fosfomycin resistance gene isolated from clinical strain of Escherichia coli E265. It is susceptible to amikacin tetracycline and imipenem and resistant to sulphonamide cephalosporins gentamicin ciprofloxacin chloramphenicol and streptomycin	fosfomycin
+SAMN13042171.fasta	contig00006	173	577	-	H-NS	1-405/414	===============	0/0	97.83	87.16	card	BA000007.3:1737691-1738105	H-NS is a histone-like protein involved in global gene regulation in Gram-negative bacteria. It is a repressor of the membrane fusion protein genes acrE mdtE and emrK as well as nearby genes of many RND-type multidrug exporters.	bicyclomycin/antibacterial_free_fatty_acids/diaminopyrimidine/benzalkonium_chloride/isoniazid/lincosamide/nitroimidazole/rifamycin/nucleoside/penam/macrolide/aminoglycoside/carbapenem/rhodamine/cephamycin/aminocoumarin/peptide/fosfomycin/phenicol/glycylcycline/monobactam/fluoroquinolone/cephalosporin/acridine_dye/oxazolidinone/triclosan/tetracycline
+SAMN13042171.fasta	contig00007	21771	22687	-	pmrF	16-932/969	===============	0/0	94.63	76.77	card	U00096:2367071-2368040	PmrF is required for the synthesis and transfer of 4-amino-4-deoxy-L-arabinose (Ara4N) to Lipid A which allows gram-negative bacteria to resist the antimicrobial activity of cationic antimicrobial peptides and antibiotics such as polymyxin. pmrF corresponds to 1 locus in Pseudomonas aeruginosa PAO1 and 1 locus in Pseudomonas aeruginosa LESB58.	peptide
+SAMN13042171.fasta	contig00008	29896	31020	-	Klebsiella_pneumoniae_OmpK37	1-1125/1125	===============	0/0	100.00	92.71	card	AJ011502.1:301-1426	Klebsiella pneumoniae outer membrane porin protein. Is preferentially detected in porin-deficient strains. Functional characterization of this new porin revealed a narrower pore than those of porins OmpK35 and OmpK36 which did not allow penetration by certain ##-lactams. Also when a resistant strain expresses porin OmpK37 is less susceptible to cefotaxime and cefoxitin than when it is expressing either OmpK36 or OmpK35.	cephalosporin/penam/cephamycin/monobactam/carbapenem/penem
+SAMN13042171.fasta	contig00008	175297	175659	+	Klebsiella_pneumoniae_KpnE	1-363/363	===============	0/0	100.00	88.43	card	AP006725.1:2483890-2484253	KpnE subunit of KpnEF resembles EbrAB from E. coli. Mutation in KpnEF resulted in increased susceptibility to cefepime ceftriaxon colistin erythromycin rifampin tetracycline and streptomycin as well as enhanced sensitivity toward sodium dodecyl sulfate deoxycholate dyes benzalkonium chloride chlorhexidine and triclosan	diaminopyrimidine/bicyclomycin/antibacterial_free_fatty_acids/benzalkonium_chloride/isoniazid/lincosamide/nucleoside/nitroimidazole/rifamycin/penam/macrolide/aminoglycoside/peptide/rhodamine/phenicol/glycylcycline/fosfomycin/oxazolidinone/acridine_dye/tetracycline/fluoroquinolone/cephalosporin
+SAMN13042171.fasta	contig00008	175646	175975	+	Klebsiella_pneumoniae_KpnF	1-330/330	===============	0/0	100.00	92.42	card	AP006725.1:2484239-2484569	KpnF subunit of KpnEF resembles EbrAB from E. coli. Mutation in KpnEF resulted in increased susceptibility to cefepime ceftriaxon colistin erythromycin rifampin tetracycline and streptomycin as well as enhanced sensitivity toward sodium dodecyl sulfate deoxycholate dyes benzalkonium chloride chlorhexidine and triclosan.	fosfomycin/glycylcycline/phenicol/cephalosporin/fluoroquinolone/acridine_dye/oxazolidinone/tetracycline/aminoglycoside/rhodamine/peptide/rifamycin/nitroimidazole/nucleoside/macrolide/penam/bicyclomycin/antibacterial_free_fatty_acids/diaminopyrimidine/isoniazid/lincosamide/benzalkonium_chloride
+SAMN13042171.fasta	contig00008	217869	218728	-	OKP-B-18	1-860/861	===============	0/0	99.88	99.53	card	AM850920:41-902	OKP-B-18 is a beta-lactamase found in Klebsiella pneumoniae	cephalosporin/penam
+SAMN13042171.fasta	contig00009	93196	94580	+	tolC	1-1403/1488	========/======	2/18	93.08	78.40	card	FJ768952:1-1489	TolC is a protein subunit of many multidrug efflux complexes in Gram negative bacteria. It is an outer membrane efflux protein and is constitutively open. Regulation of efflux activity is often at its periplasmic entrance by other components of the efflux complex.	rifamycin/triclosan/tetracycline/glycylcycline/cephamycin/carbapenem/aminoglycoside/macrolide/penam/pleuromutilin/nitroimidazole/nucleoside/isoniazid/lincosamide/benzalkonium_chloride/antibacterial_free_fatty_acids/bicyclomycin/penem/diaminopyrimidine/cephalosporin/fluoroquinolone/oxazolidinone/acridine_dye/fosfomycin/monobactam/phenicol/rhodamine/peptide/aminocoumarin
+SAMN13042171.fasta	contig00009	108077	108898	-	bacA	1-822/822	===============	0/0	100.00	80.53	card	U00096.3:3203310-3204132	The bacA gene product (BacA) recycles undecaprenyl pyrophosphate during cell wall biosynthesis which confers resistance to bacitracin.	peptide
+SAMN13042171.fasta	contig00012	114632	116380	-	msbA	1-1749/1749	===============	0/0	100.00	80.96	card	U00096.3:966621-968370	MsbA is a multidrug resistance transporter homolog from E. coli and belongs to a superfamily of transporters that contain an adenosine triphosphate (ATP) binding cassette (ABC) which is also called a nucleotide-binding domain (NBD). MsbA is a member of the MDR-ABC transporter group by sequence homology. MsbA transports lipid A a major component of the bacterial outer cell membrane and is the only bacterial ABC transporter that is essential for cell viability.	peptide/rifamycin/pleuromutilin/nitroimidazole/tetracycline/acridine_dye/cephalosporin/macrolide/penam/fluoroquinolone
+SAMN13042171.fasta	contig00012	155728	155882	-	macB	1586-1740/1935	............==.	0/0	8.01	76.13	card	AY768532:1-1936	MacB is an ATP-binding cassette (ABC) transporter that exports macrolides with 14- or 15- membered lactones. It forms an antibiotic efflux complex with MacA and TolC. macB corresponds to 1 locus in Pseudomonas aeruginosa PAO1 and 1 locus in Pseudomonas aeruginosa LESB58.	macrolide/cephalosporin/fluoroquinolone/penam/acridine_dye/tetracycline/pleuromutilin/rifamycin/nitroimidazole/peptide
+SAMN13042171.fasta	contig00015	140084	143160	+	acrF	1-3080/3105	========/======	7/9	99.00	76.74	card	U00096:3415033-3418138	AcrF is a inner membrane transporter similar to AcrB.	aminoglycoside/carbapenem/diaminopyrimidine/antibacterial_free_fatty_acids/aminocoumarin/cephamycin/phenicol/monobactam/glycylcycline/tetracycline/triclosan/acridine_dye/fluoroquinolone/penam/cephalosporin/macrolide
+SAMN13042171.fasta	contig00016	114383	115015	-	CRP	1-633/633	===============	0/0	100.00	87.36	card	AP009048.1:4153664-4154297	CRP is a global regulator that represses MdtEF multidrug efflux pump expression.	tetracycline/triclosan/acridine_dye/fluoroquinolone/penam/macrolide/phenicol/monobactam/glycylcycline/aminocoumarin/aminoglycoside/carbapenem/diaminopyrimidine/antibacterial_free_fatty_acids
+SAMN13042171.fasta	contig00018	59247	60613	-	cpxA	1-1367/1374	===============	0/0	99.49	81.71	card	BA000007.3:4903689-4905063	CpxA is a membrane-localized sensor kinase that is activated by envelope stress. It starts a kinase cascade that activates CpxR which promotes efflux complex expression.	acridine_dye/tetracycline/triclosan/macrolide/fluoroquinolone/penam/glycylcycline/monobactam/phenicol/aminocoumarin/diaminopyrimidine/carbapenem/aminoglycoside/antibacterial_free_fatty_acids
+SAMN13042171.fasta	contig00019	55611	58724	+	acrD	1-3114/3114	========/======	2/2	99.97	80.58	card	AP009048.1:2586251-2589365	AcrD is an aminoglycoside efflux pump expressed in E. coli. Its expression can be induced by indole and is regulated by baeRS and cpxAR.	aminocoumarin/aminoglycoside/carbapenem/diaminopyrimidine/antibacterial_free_fatty_acids/tetracycline/triclosan/acridine_dye/fluoroquinolone/penam/macrolide/phenicol/monobactam/glycylcycline
+SAMN13042171.fasta	contig00024	52994	54169	+	mdtH	1-1176/1209	===============	0/0	97.27	77.04	card	U00096:1124118-1125327	Multidrug resistance protein MdtH	nitroimidazole/rifamycin/nucleoside/penam/macrolide/antibacterial_free_fatty_acids/bicyclomycin/diaminopyrimidine/benzalkonium_chloride/isoniazid/lincosamide/fosfomycin/phenicol/glycylcycline/fluoroquinolone/cephalosporin/tetracycline/acridine_dye/oxazolidinone/rhodamine/peptide
+SAMN13042171.fasta	contig00024	60718	61881	+	mdtG	18-1181/1227	========/======	3/8	94.54	76.03	card	CP000800.1:1191728-1192955	The MdtG protein also named YceE appears to be a member of the major facilitator superfamily of transporters and it has been reported when overexpressed to increase fosfomycin and deoxycholate resistances. mdtG is a member of the marA-soxS-rob regulon.	macrolide/penam/rifamycin/nitroimidazole/nucleoside/isoniazid/lincosamide/benzalkonium_chloride/bicyclomycin/antibacterial_free_fatty_acids/diaminopyrimidine/cephalosporin/fluoroquinolone/acridine_dye/oxazolidinone/tetracycline/fosfomycin/glycylcycline/phenicol/rhodamine/peptide
+SAMN13042171.fasta	contig00025	3007	3375	-	marA	1-369/384	===============	0/0	96.09	81.57	card	AP009048.1:1621288-1621672	In the presence of antibiotic stress E. coli overexpresses the global activator protein MarA which besides inducing MDR efflux pump AcrAB also down- regulates synthesis of the porin OmpF.	antibacterial_free_fatty_acids/penem/diaminopyrimidine/macrolide/penam/rifamycin/cephamycin/aminocoumarin/carbapenem/aminoglycoside/cephalosporin/fluoroquinolone/tetracycline/triclosan/acridine_dye/monobactam/glycylcycline/phenicol
+SAMN13042171.fasta	contig00028	4483	5688	+	Escherichia_coli_mdfA	1-1206/1233	========/======	2/2	97.73	78.21	card	JQ394987:1-1234	Multidrug efflux pump in E. coli. This multidrug efflux system was originally identified as the Cmr/CmlA chloramphenicol exporter.	antibacterial_free_fatty_acids/bicyclomycin/diaminopyrimidine/isoniazid/lincosamide/benzalkonium_chloride/rifamycin/nitroimidazole/nucleoside/macrolide/penam/rhodamine/peptide/fosfomycin/glycylcycline/phenicol/cephalosporin/fluoroquinolone/tetracycline/acridine_dye/oxazolidinone
+SAMN13042171.fasta	contig00030	26098	26477	-	Nocardia_rifampin_resistant_beta-subunit_of_RNA_polymerase_(rpoB2)	2980-3359/3489	......../...===	2/2	10.86	76.11	card	AP006618.1:4835200-4838689	Due to gene duplication the genomes of Nocardia species include both rifampin-sensitive beta-subunit of RNA polymerase (rpoB) and rifampin-resistant beta-subunit of RNA polymerase (rpoB2) genes with ~88% similarity between the two gene products. Expression of the rpoB2 variant results in replacement of rifampin sensitivity with rifampin resistance.	rifamycin
+SAMN13042171.fasta	contig00030	26758	26924	-	Nocardia_rifampin_resistant_beta-subunit_of_RNA_polymerase_(rpoB2)	2605-2771/3489	...........=...	0/0	4.79	75.45	card	AP006618.1:4835200-4838689	Due to gene duplication the genomes of Nocardia species include both rifampin-sensitive beta-subunit of RNA polymerase (rpoB) and rifampin-resistant beta-subunit of RNA polymerase (rpoB2) genes with ~88% similarity between the two gene products. Expression of the rpoB2 variant results in replacement of rifampin sensitivity with rifampin resistance.	rifamycin
+SAMN13042171.fasta	contig00035	7853	8713	-	TEM-150	1-861/861	===============	0/0	100.00	99.88	card	AM183304:209-1070	TEM-150 is a beta-lactamase found in Enterobacter spp. E. coli and Klebsiella pneumoniae.	penem/monobactam/cephalosporin/penam
+SAMN13042171.fasta	contig00035	9413	10237	-	OXA-9	1-825/825	===============	0/0	100.00	100.00	card	M55547:1-826	OXA-9 is a beta-lactamase found in Klebsiella pneumoniae.	cephalosporin/penam
+SAMN13042171.fasta	contig00035	10297	11098	-	ANT(3'')-IIa	168-972/972	..======/======	1/3	82.51	99.25	card	X02340.1:223-1195	ANT(3'')-IIa is a aminoglycoside nucleotidyltransferase identified in Acinetobacter spp. via horizontal gene transfer mechanisms.	aminoglycoside
+SAMN13042171.fasta	contig00035	10991	11741	-	AAC(6')-Ib7	234-980/980	...=====/======	2/4	76.22	96.67	card	Y11946.1:1-981	AAC(6')-Ib7 is a plasmid-encoded aminoglycoside acetyltransferase in E. cloacae and C. freundii	aminoglycoside
+SAMN13042171.fasta	contig00035	14582	15457	+	CTX-M-15	1-876/876	===============	0/0	100.00	100.00	card	AY044436:1436-2312	CTX-M-15 is a beta-lactamase found in the Enterobacteriaceae family	cephalosporin
+SAMN13042171.fasta	contig00035	20098	20754	+	QnrS1	1-657/657	===============	0/0	100.00	100.00	card	DQ485529.1:1-658	QnrS1 is a plasmid-mediated quinolone resistance protein found in Shigella flexneri	fluoroquinolone
+SAMN13042171.fasta	contig00063	2129	2494	-	determinant_of_bleomycin_resistance	1-366/366	===============	0/0	100.00	100.00	card	KC503911.1:9884-10250	A novel bleomycin resistance protein encoded by a metallo-beta-lactamase-associated ble gene. Expression of BRP(MBL) confers resistance to bleomycin and bleomycin-like antibiotics in Enterobacteriaceae and Acinetobacter where it is co-expressed with an MBL and controlled by the same promoter region.	glycopeptide
+SAMN13042171.fasta	contig00063	2498	3310	-	NDM-1	1-813/813	===============	0/0	100.00	100.00	card	FN396876:2407-3220	NDM-1 is a metallo-beta-lactamase isolated from Klebsiella pneumoniae with nearly complete resistance to all beta-lactam antibiotics.	cephalosporin/penam/carbapenem/cephamycin
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SAMN13042171_abricate_report_screened.tsv	Thu Jan 02 21:04:04 2020 +0000
@@ -0,0 +1,3 @@
+#FILE	SEQUENCE	START	END	STRAND	GENE	COVERAGE	COVERAGE_MAP	GAPS	%COVERAGE	%IDENTITY	DATABASE	ACCESSION	PRODUCT	RESISTANCE
+SAMN13042171.fasta	contig00063	2498	3310	-	NDM-1	1-813/813	===============	0/0	100.00	100.00	card	FN396876:2407-3220	NDM-1 is a metallo-beta-lactamase isolated from Klebsiella pneumoniae with nearly complete resistance to all beta-lactam antibiotics.	cephalosporin/penam/carbapenem/cephamycin
+SAMN13042171.fasta	contig00035	9413	10237	-	OXA-9	1-825/825	===============	0/0	100.00	100.00	card	M55547:1-826	OXA-9 is a beta-lactamase found in Klebsiella pneumoniae.	cephalosporin/penam
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SAMN13042171_gene_detection_status.tsv	Thu Jan 02 21:04:04 2020 +0000
@@ -0,0 +1,5 @@
+gene_name	detected
+KPC	False
+NDM	True
+OXA	True
+rpoB2	False
--- a/test-data/abricate_report.tsv	Fri Sep 27 13:34:45 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,63 +0,0 @@
-#FILE	SEQUENCE	START	END	GENE	COVERAGE	COVERAGE_MAP	GAPS	%COVERAGE	%IDENTITY	DATABASE	ACCESSION	PRODUCT
-SRR9113479_assembly	1	188844	190331	tolC	1-1488/1488	===============	0/0	100.00	99.73	card	FJ768952:1-1489	card~~~tolC~~~FJ768952:1-1489 TolC is a protein subunit of many multidrug efflux complexes in Gram negative bacteria. It is an outer membrane efflux protein and is constitutively open. Regulation of efflux activity is often at its periplasmic entrance by other components of the efflux complex.
-SRR9113479_assembly	1	212707	213528	bacA	1-822/822	===============	0/0	100.00	98.66	card	U00096.3:3203310-3204132	card~~~bacA~~~U00096.3:3203310-3204132 The bacA gene product (BacA) recycles undecaprenyl pyrophosphate during cell wall biosynthesis which confers resistance to bacitracin.
-SRR9113479_assembly	10	141472	142104	CRP	1-633/633	===============	0/0	100.00	99.21	card	AP009048.1:4153664-4154297	card~~~CRP~~~AP009048.1:4153664-4154297 CRP is a global regulator that represses MdtEF multidrug efflux pump expression.
-SRR9113479_assembly	11	5292	8396	acrF	1-3105/3105	===============	0/0	100.00	99.13	card	U00096:3415033-3418138	card~~~acrF~~~U00096:3415033-3418138 AcrF is a inner membrane transporter similar to AcrB.
-SRR9113479_assembly	11	8408	9565	acrE	1-1158/1158	===============	0/0	100.00	98.96	card	U00096:3413864-3415022	card~~~acrE~~~U00096:3413864-3415022 AcrE is a membrane fusion protein similar to AcrA.
-SRR9113479_assembly	11	9964	10626	acrS	1-663/663	===============	0/0	100.00	99.25	card	U00096:3412803-3413466	card~~~acrS~~~U00096:3412803-3413466 AcrS is a repressor of the AcrAB efflux complex and is associated with the expression of AcrEF. AcrS is believed to regulate a switch between AcrAB and AcrEF efflux.
-SRR9113479_assembly	12	1	422	evgS	1-422/3594	==.............	0/0	11.74	98.58	card	U00096:2484374-2487968	card~~~evgS~~~U00096:2484374-2487968 EvgS is a sensor protein that phosphorylates the regulatory protein EvgA. evgS corresponds to 1 locus in Pseudomonas aeruginosa PAO1 and 1 locus in Pseudomonas aeruginosa LESB58.
-SRR9113479_assembly	12	427	1041	evgA	1-615/615	===============	0/0	100.00	99.02	card	BA000007.3:3212026-3212641	card~~~evgA~~~BA000007.3:3212026-3212641 EvgA when phosphorylated is a positive regulator for efflux protein complexes emrKY and mdtEF. While usually phosphorylated in a EvgS dependent manner it can be phosphorylated in the absence of EvgS when overexpressed.
-SRR9113479_assembly	12	1565	2620	emrK	1-1056/1056	===============	0/0	100.00	99.72	card	D78168:537-1593	card~~~emrK~~~D78168:537-1593 emrK is a membrane fusion protein that is a homolog of EmrA. Together with the inner membrane transporter EmrY and the outer membrane channel TolC it mediates multidrug efflux.
-SRR9113479_assembly	12	2620	4158	emrY	1-1539/1539	===============	0/0	100.00	99.67	card	D78168:1592-3131	card~~~emrY~~~D78168:1592-3131 emrY is a multidrug transport that moves substrates across the inner membrane of the Gram-negative E. coli. It is a homolog of emrB.
-SRR9113479_assembly	12	107325	108293	pmrF	1-969/969	===============	0/0	100.00	99.28	card	U00096:2367071-2368040	card~~~pmrF~~~U00096:2367071-2368040 PmrF is required for the synthesis and transfer of 4-amino-4-deoxy-L-arabinose (Ara4N) to Lipid A which allows gram-negative bacteria to resist the antimicrobial activity of cationic antimicrobial peptides and antibiotics such as polymyxin. pmrF corresponds to 1 locus in Pseudomonas aeruginosa PAO1 and 1 locus in Pseudomonas aeruginosa LESB58.
-SRR9113479_assembly	14	22658	24124	mdtP	1-1467/1467	===============	0/0	100.00	98.16	card	AP009048.1:4303043-4304510	card~~~mdtP~~~AP009048.1:4303043-4304510 Multidrug resistance efflux pump. Could be involved in resistance to puromycin acriflavine and tetraphenylarsonium chloride
-SRR9113479_assembly	14	24121	26172	mdtO	1-2052/2052	===============	0/0	100.00	98.39	card	AP009048.1:4304506-4306558	card~~~mdtO~~~AP009048.1:4304506-4306558 Multidrug resistance efflux pump. Could be involved in resistance to puromycin acriflavine and tetraphenylarsonium chloride
-SRR9113479_assembly	14	26172	27203	mdtN	1-1032/1032	===============	0/0	100.00	98.06	card	AP009048.1:4306557-4307589	card~~~mdtN~~~AP009048.1:4306557-4307589 Multidrug resistance efflux pump. Could be involved in resistance to puromycin acriflavine and tetraphenylarsonium chloride.
-SRR9113479_assembly	14	56559	58202	eptA	1-1644/1644	===============	0/0	100.00	99.33	card	AP009048:4338625-4340269	card~~~eptA~~~AP009048:4338625-4340269 PmrC mediates the modification of Lipid A by the addition of 4-amino-4-deoxy-L-arabinose (L-Ara4N) and phosphoethanolamine resulting in a less negative cell membrane and decreased binding of polymyxin B.
-SRR9113479_assembly	14	100409	101542	Escherichia_coli_ampC	1-1134/1134	===============	0/0	100.00	97.35	card	NC_000913.3:4377811-4378945	card~~~Escherichia_coli_ampC~~~NC_000913.3:4377811-4378945 A class C ampC beta-lactamase (cephalosporinase) enzyme described in Escherichia coli shown clinically to confer resistance to penicillin-like and cephalosporin-class antibiotics.
-SRR9113479_assembly	16	18584	19810	mdtG	1-1227/1227	===============	0/0	100.00	99.27	card	CP000800.1:1191728-1192955	card~~~mdtG~~~CP000800.1:1191728-1192955 The MdtG protein also named YceE appears to be a member of the major facilitator superfamily of transporters and it has been reported when overexpressed to increase fosfomycin and deoxycholate resistances. mdtG is a member of the marA-soxS-rob regulon.
-SRR9113479_assembly	16	28441	29649	mdtH	1-1209/1209	===============	0/0	100.00	98.43	card	U00096:1124118-1125327	card~~~mdtH~~~U00096:1124118-1125327 Multidrug resistance protein MdtH
-SRR9113479_assembly	19	22628	24001	cpxA	1-1374/1374	===============	0/0	100.00	98.62	card	BA000007.3:4903689-4905063	card~~~cpxA~~~BA000007.3:4903689-4905063 CpxA is a membrane-localized sensor kinase that is activated by envelope stress. It starts a kinase cascade that activates CpxR which promotes efflux complex expression.
-SRR9113479_assembly	2	256854	258086	mdtM	1-1233/1233	===============	0/0	100.00	96.27	card	U00096.3:4567287-4568520	card~~~mdtM~~~U00096.3:4567287-4568520 Multidrug resistance protein MdtM
-SRR9113479_assembly	24	23717	24075	Klebsiella_pneumoniae_KpnE	5-363/363	========/======	2/2	98.62	75.56	card	AP006725.1:2483890-2484253	card~~~Klebsiella_pneumoniae_KpnE~~~AP006725.1:2483890-2484253 KpnE subunit of KpnEF resembles EbrAB from E. coli. Mutation in KpnEF resulted in increased susceptibility to cefepime ceftriaxon colistin erythromycin rifampin tetracycline and streptomycin as well as enhanced sensitivity toward sodium dodecyl sulfate deoxycholate dyes benzalkonium chloride chlorhexidine and triclosan
-SRR9113479_assembly	24	24067	24391	Klebsiella_pneumoniae_KpnF	6-330/330	===============	0/0	98.48	75.39	card	AP006725.1:2484239-2484569	card~~~Klebsiella_pneumoniae_KpnF~~~AP006725.1:2484239-2484569 KpnF subunit of KpnEF resembles EbrAB from E. coli. Mutation in KpnEF resulted in increased susceptibility to cefepime ceftriaxon colistin erythromycin rifampin tetracycline and streptomycin as well as enhanced sensitivity toward sodium dodecyl sulfate deoxycholate dyes benzalkonium chloride chlorhexidine and triclosan.
-SRR9113479_assembly	25	12898	14144	tet(A)	1-1247/1275	===============	0/0	97.80	100.00	card	AF534183.1:2971-4246	card~~~tet(A)~~~AF534183.1:2971-4246 TetA is a tetracycline efflux pump found in many species of Gram-negative bacteria.
-SRR9113479_assembly	25	17540	18189	OXA-368	2072-2721/2721	...........====	0/0	23.89	99.08	card	KT736121.1:1-2722	card~~~OXA-368~~~KT736121.1:1-2722 OXA-368 is a beta-lactamase found in Aeromonas sobria. From the Lahey list of OXA beta-lactamases.
-SRR9113479_assembly	25	18254	19084	OXA-1	1-831/831	===============	0/0	100.00	100.00	card	JN420336.1:1400-2231	card~~~OXA-1~~~JN420336.1:1400-2231 OXA-1 is a beta-lactamase found in E. coli
-SRR9113479_assembly	25	19222	19666	catB3	1-445/633	===========....	0/0	70.30	99.78	card	JX101693.1:58201-58834	card~~~catB3~~~JX101693.1:58201-58834 catB3 is a plasmid or chromosome-encoded variant of the cat gene found in Salmonella typhimurium Acinetobacter baumannii and Escherichia coli
-SRR9113479_assembly	25	21029	21904	CTX-M-15	1-876/876	===============	0/0	100.00	100.00	card	AY044436:1436-2312	card~~~CTX-M-15~~~AY044436:1436-2312 CTX-M-15 is a beta-lactamase found in the Enterobacteriaceae family
-SRR9113479_assembly	25	54292	54540	AAC(6')-Ib7	1-249/980	====...........	0/0	25.41	99.60	card	Y11946.1:1-981	card~~~AAC(6')-Ib7~~~Y11946.1:1-981 AAC(6')-Ib7 is a plasmid-encoded aminoglycoside acetyltransferase in E. cloacae and C. freundii
-SRR9113479_assembly	25	54554	55027	dfrA17	1-474/474	===============	0/0	100.00	100.00	card	DQ838665:1-475	card~~~dfrA17~~~DQ838665:1-475 dfrA17 is an integron-encoded dihydrofolate reductase found in Escherichia coli
-SRR9113479_assembly	25	55158	55946	aadA5	1-789/789	===============	0/0	100.00	100.00	card	AF137361:64-853	card~~~aadA5~~~AF137361:64-853 aadA5 is an aminoglycoside nucleotidyltransferase gene encoded by plasmids transposons and integrons in E. coli K. pneumoniae Kluyvera georgiana P. aeruginosa and E. cloacae
-SRR9113479_assembly	25	56493	57332	sul1	1-840/840	===============	0/0	100.00	100.00	card	JF969163:1054-1894	card~~~sul1~~~JF969163:1054-1894 Sul1 is a sulfonamide resistant dihydropteroate synthase of Gram-negative bacteria. It is linked to other resistance genes of class 1 integrons.
-SRR9113479_assembly	25	62643	63548	mphA	1-906/906	===============	0/0	100.00	100.00	card	D16251.1:1626-2532	card~~~mphA~~~D16251.1:1626-2532 The mphA gene encodes for resistance enzyme MPH(2')-I which preferentially inactivate 14-membered macrolides (e.g.erythromycin telithromycin roxithromycin) over 16-membered macrolides (e.g.tylosin spiramycin). It phosphorylates macrolides at 2'-OH hydroxyl of desosamine sugar of macrolides in a GTP-dependent manner.
-SRR9113479_assembly	29	29543	29926	marA	1-384/384	===============	0/0	100.00	99.48	card	AP009048.1:1621288-1621672	card~~~marA~~~AP009048.1:1621288-1621672 In the presence of antibiotic stress E. coli overexpresses the global activator protein MarA which besides inducing MDR efflux pump AcrAB also down- regulates synthesis of the porin OmpF.
-SRR9113479_assembly	3	120242	121990	msbA	1-1749/1749	===============	0/0	100.00	99.37	card	U00096.3:966621-968370	card~~~msbA~~~U00096.3:966621-968370 MsbA is a multidrug resistance transporter homolog from E. coli and belongs to a superfamily of transporters that contain an adenosine triphosphate (ATP) binding cassette (ABC) which is also called a nucleotide-binding domain (NBD). MsbA is a member of the MDR-ABC transporter group by sequence homology. MsbA transports lipid A a major component of the bacterial outer cell membrane and is the only bacterial ABC transporter that is essential for cell viability.
-SRR9113479_assembly	3	220771	222003	Escherichia_coli_mdfA	1-1233/1233	===============	0/0	100.00	97.24	card	JQ394987:1-1234	card~~~Escherichia_coli_mdfA~~~JQ394987:1-1234 Multidrug efflux pump in E. coli. This multidrug efflux system was originally identified as the Cmr/CmlA chloramphenicol exporter.
-SRR9113479_assembly	31	2026	3669	yojI	1-1644/1644	===============	0/0	100.00	98.11	card	U00096.3:2306972-2308616	card~~~yojI~~~U00096.3:2306972-2308616 YojI mediates resistance to the peptide antibiotic microcin J25 when it is expressed from a multicopy vector. YojI is capable of pumping out microcin molecules.  The outer membrane protein TolC in addition to YojI is required for export of microcin J25 out of the cell. Microcin J25 is thus the first known substrate for YojI.
-SRR9113479_assembly	32	178	990	NDM-7	1-813/813	===============	0/0	100.00	100.00	card	JX262694:1-814	card~~~NDM-7~~~JX262694:1-814 NDM-7 is a beta-lactamase found in Escherichia coli
-SRR9113479_assembly	32	994	1359	determinant_of_bleomycin_resistance	1-366/366	===============	0/0	100.00	100.00	card	KC503911.1:9884-10250	card~~~determinant_of_bleomycin_resistance~~~KC503911.1:9884-10250 A novel bleomycin resistance protein encoded by a metallo-beta-lactamase-associated ble gene. Expression of BRP(MBL) confers resistance to bleomycin and bleomycin-like antibiotics in Enterobacteriaceae and Acinetobacter where it is co-expressed with an MBL and controlled by the same promoter region.
-SRR9113479_assembly	40	31611	34287	evgS	918-3594/3594	...============	0/0	74.49	99.48	card	U00096:2484374-2487968	card~~~evgS~~~U00096:2484374-2487968 EvgS is a sensor protein that phosphorylates the regulatory protein EvgA. evgS corresponds to 1 locus in Pseudomonas aeruginosa PAO1 and 1 locus in Pseudomonas aeruginosa LESB58.
-SRR9113479_assembly	41	31495	31827	Escherichia_coli_emrE	1-333/333	===============	0/0	100.00	91.89	card	Z11877.1:486-819	card~~~Escherichia_coli_emrE~~~Z11877.1:486-819 Member of the small MDR (multidrug resistance) family of transporters; in Escherichia coli this protein provides resistance against a number of positively charged compounds including ethidium bromide and erythromycin; proton-dependent secondary transporter which exchanges protons for compound translocation
-SRR9113479_assembly	43	24684	25214	emrR	1-531/531	===============	0/0	100.00	100.00	card	U00096.3:2810770-2811301	card~~~emrR~~~U00096.3:2810770-2811301 EmrR is a negative regulator for the EmrAB-TolC multidrug efflux pump in E. coli. Mutations lead to EmrAB-TolC overexpression.
-SRR9113479_assembly	43	25341	26513	emrA	1-1173/1173	===============	0/0	100.00	97.53	card	AP009048:2810083-2811256	card~~~emrA~~~AP009048:2810083-2811256 EmrA is a membrane fusion protein providing an efflux pathway with EmrB and TolC between the inner and outer membranes of E. coli a Gram-negative bacterium.
-SRR9113479_assembly	43	26530	28068	emrB	1-1539/1539	===============	0/0	100.00	99.87	card	U00096:2812616-2814155	card~~~emrB~~~U00096:2812616-2814155 emrB is a translocase in the emrB -TolC efflux protein in E. coli. It recognizes substrates including carbonyl cyanide m-chlorophenylhydrazone (CCCP) nalidixic acid and thioloactomycin.
-SRR9113479_assembly	44	22397	23069	kdpE	1-673/678	===============	0/0	99.26	97.18	card	U00096.3:721056-721734	card~~~kdpE~~~U00096.3:721056-721734 kdpE is a transcriptional activator that is part of the two-component system KdpD/KdpE that is studied for its regulatory role in potassium transport and has been identified as an adaptive regulator involved in the virulence and intracellular survival of pathogenic bacteria. kdpE regulates a range of virulence loci through direct promoter binding.
-SRR9113479_assembly	45	7769	8121	Nocardia_rifampin_resistant_beta-subunit_of_RNA_polymerase_(rpoB2)	2980-3332/3489	......../...===	4/6	10.03	75.56	card	AP006618.1:4835200-4838689	card~~~Nocardia_rifampin_resistant_beta-subunit_of_RNA_polymerase_(rpoB2)~~~AP006618.1:4835200-4838689 Due to gene duplication the genomes of Nocardia species include both rifampin-sensitive beta-subunit of RNA polymerase (rpoB) and rifampin-resistant beta-subunit of RNA polymerase (rpoB2) genes with ~88% similarity between the two gene products. Expression of the rpoB2 variant results in replacement of rifampin sensitivity with rifampin resistance.
-SRR9113479_assembly	47	23039	24196	mdtE	1-1158/1158	===============	0/0	100.00	99.74	card	AP009048.1:3980026-3981184	card~~~mdtE~~~AP009048.1:3980026-3981184 MdtE is the membrane fusion protein of the MdtEF multidrug efflux complex. It shares 70% sequence similarity with AcrA.
-SRR9113479_assembly	47	24221	27334	mdtF	1-3114/3114	===============	0/0	100.00	98.49	card	U00096:3660414-3663528	card~~~mdtF~~~U00096:3660414-3663528 MdtF is the multidrug inner membrane transporter for the MdtEF-TolC efflux complex.
-SRR9113479_assembly	47	27697	28425	gadW	1-729/729	===============	0/0	100.00	94.38	card	CP015085.1:2551712-2552441	card~~~gadW~~~CP015085.1:2551712-2552441 GadW is an AraC-family regulator that promotes mdtEF expression to confer multidrug resistance. GadW inhibits GadX-dependent activation. GadW clearly represses gadX and in situations where GadX is missing activates gadA and gadBC.
-SRR9113479_assembly	47	28793	29617	gadX	1-825/825	===============	0/0	100.00	97.94	card	AP009048.1:3974605-3975430	card~~~gadX~~~AP009048.1:3974605-3975430 GadX is an AraC-family regulator that promotes mdtEF expression to confer multidrug resistance.
-SRR9113479_assembly	5	4886	5299	H-NS	1-414/414	===============	0/0	100.00	99.52	card	BA000007.3:1737691-1738105	card~~~H-NS~~~BA000007.3:1737691-1738105 H-NS is a histone-like protein involved in global gene regulation in Gram-negative bacteria. It is a repressor of the membrane fusion protein genes acrE mdtE and emrK as well as nearby genes of many RND-type multidrug exporters.
-SRR9113479_assembly	50	24357	25523	ugd	1-1167/1167	========/======	6/12	99.49	84.57	card	U00096:2098447-2099614	card~~~ugd~~~U00096:2098447-2099614 PmrE is required for the synthesis and transfer of 4-amino-4-deoxy-L-arabinose (Ara4N) to Lipid A which allows gram-negative bacteria to resist the antimicrobial activity of cationic antimicrobial peptides and antibiotics such as polymyxin
-SRR9113479_assembly	6	113576	114050	mphB	3-477/477	===============	0/0	99.58	97.47	card	AE005174.2:3397371-3397848	card~~~mphB~~~AE005174.2:3397371-3397848 The mphB gene encodes for MPH(2')-II. This enzymes phosphorylates 14-membered and 16-membered macrolides.  It phosphorylates macrolides in GTP- dependent manner at 2'-OH hydroxyl of desosamine sugar of macrolides.
-SRR9113479_assembly	6	135287	138400	acrD	1-3114/3114	===============	0/0	100.00	99.07	card	AP009048.1:2586251-2589365	card~~~acrD~~~AP009048.1:2586251-2589365 AcrD is an aminoglycoside efflux pump expressed in E. coli. Its expression can be induced by indole and is regulated by baeRS and cpxAR.
-SRR9113479_assembly	6	169518	170822	Escherichia_coli_ampC1_beta-lactamase	1-1305/1305	===============	0/0	100.00	98.62	card	FN649414.1:2765051-2766356	card~~~Escherichia_coli_ampC1_beta-lactamase~~~FN649414.1:2765051-2766356 An ampC-like beta-lactamase identified from Escherichia coli.
-SRR9113479_assembly	7	42780	44027	mdtA	1-1248/1248	===============	0/0	100.00	98.16	card	U00096:2154016-2155264	card~~~mdtA~~~U00096:2154016-2155264 MdtA is the membrane fusion protein of the multidrug efflux complex mdtABC.
-SRR9113479_assembly	7	44027	47149	mdtB	1-3123/3123	===============	0/0	100.00	99.78	card	U00096:2155263-2158386	card~~~mdtB~~~U00096:2155263-2158386 MdtB is a transporter that forms a heteromultimer complex with MdtC to form a multidrug transporter. MdtBC is part of the MdtABC-TolC efflux complex.
-SRR9113479_assembly	7	47150	50227	mdtC	1-3078/3078	===============	0/0	100.00	98.57	card	U00096:2158386-2161464	card~~~mdtC~~~U00096:2158386-2161464 MdtC is a transporter that forms a heteromultimer complex with MdtB to form a multidrug transporter. MdtBC is part of the MdtABC-TolC efflux complex. In the absence of MdtB MdtC can form a homomultimer complex that results in a functioning efflux complex with a narrower drug specificity. mdtC corresponds to 3 loci in Pseudomonas aeruginosa PAO1 (gene name: muxC/muxB) and 3 loci in Pseudomonas aeruginosa LESB58.
-SRR9113479_assembly	7	51640	53043	baeS	1-1404/1404	===============	0/0	100.00	98.08	card	AP009048:2165013-2166417	card~~~baeS~~~AP009048:2165013-2166417 BaeS is a sensor kinase in the BaeSR regulatory system. While it phosphorylates BaeR to increase its activity BaeS is not necessary for overexpressed BaeR to confer resistance.
-SRR9113479_assembly	7	53040	53762	baeR	1-723/723	===============	0/0	100.00	99.45	card	AP009048.1:2166413-2167136	card~~~baeR~~~AP009048.1:2166413-2167136 BaeR is a response regulator that promotes the expression of MdtABC and AcrD efflux complexes.
-SRR9113479_assembly	8	79721	80914	Escherichia_coli_acrA	1-1194/1194	===============	0/0	100.00	99.75	card	U00096.3:484426-485620	card~~~Escherichia_coli_acrA~~~U00096.3:484426-485620 AcrA is a subunit of the AcrAB-TolC multidrug efflux system that in E. coli.
-SRR9113479_assembly	8	80937	84086	acrB	1-3150/3150	===============	0/0	100.00	99.08	card	U00096.3:481254-484404	card~~~acrB~~~U00096.3:481254-484404 Protein subunit of AcrA-AcrB-TolC multidrug efflux complex. AcrB functions as a herterotrimer which forms the inner membrane component and is primarily responsible for substrate recognition and energy transduction by acting as a drug/proton antiporter.
-SRR9113479_assembly	8	168988	170145	Escherichia_coli_ampH	1-1158/1158	===============	0/0	100.00	99.40	card	AP012030.1:395554-396712	card~~~Escherichia_coli_ampH~~~AP012030.1:395554-396712 AmpH is a class C ampC-like beta-lactamase and penicillin-binding protein identified in Escherichia coli.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/abricate_report_screening_files.loc	Thu Jan 02 21:04:04 2020 +0000
@@ -0,0 +1,6 @@
+# Tab separated with three columns:
+# - value (Galaxy records this in the Galaxy DB)
+# - name (Galaxy shows this in the UI)
+# - path (folder name containing the Kraken DB)
+#
+test_entry	"Test Screening File"	${__HERE__}/screening_file.tsv
\ No newline at end of file
--- a/test-data/gene_detection_status.tsv	Fri Sep 27 13:34:45 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-gene_name	detected
-KPC	False
-NDM	True
-OXA	True
--- a/test-data/screen.tsv	Fri Sep 27 13:34:45 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-gene_name	regex
-KPC	KPC
-NDM	NDM
-OXA	OXA
--- a/test-data/screen.yaml	Fri Sep 27 13:34:45 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
----
-- gene: KPC
-  regex: KPC
-- gene: OXA
-  regex: OXA
-- gene: NDM
-  regex: NDM
--- a/test-data/screened_report.tsv	Fri Sep 27 13:34:45 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-#FILE	%IDENTITY	END	SEQUENCE	COVERAGE_MAP	%COVERAGE	DATABASE	ACCESSION	START	PRODUCT	GAPS	COVERAGE	GENE
-SRR9113479_assembly	100.00	990	32	===============	100.00	card	JX262694:1-814	178	card~~~NDM-7~~~JX262694:1-814 NDM-7 is a beta-lactamase found in Escherichia coli	0/0	1-813/813	NDM-7
-SRR9113479_assembly	99.08	18189	25	...........====	23.89	card	KT736121.1:1-2722	17540	card~~~OXA-368~~~KT736121.1:1-2722 OXA-368 is a beta-lactamase found in Aeromonas sobria. From the Lahey list of OXA beta-lactamases.	0/0	2072-2721/2721	OXA-368
-SRR9113479_assembly	100.00	19084	25	===============	100.00	card	JN420336.1:1400-2231	18254	card~~~OXA-1~~~JN420336.1:1400-2231 OXA-1 is a beta-lactamase found in E. coli	0/0	1-831/831	OXA-1
--- a/test-data/screening_file.tsv	Fri Sep 27 13:34:45 2019 -0400
+++ b/test-data/screening_file.tsv	Thu Jan 02 21:04:04 2020 +0000
@@ -1,4 +1,5 @@
 gene_name	regex
-KPC	KPC
-NDM	NDM
-OXA	OXA
+KPC	^KPC-\d+$
+NDM	^NDM-\d+$
+OXA	^OXA-\d+$
+rpoB2	rpoB2
--- a/tool_data_table_conf.xml.test	Fri Sep 27 13:34:45 2019 -0400
+++ b/tool_data_table_conf.xml.test	Thu Jan 02 21:04:04 2020 +0000
@@ -1,8 +1,8 @@
 <?xml version="1.0"?>
 <tables>
-    <!-- Locations of Kraken2 database in the required format -->
-    <table name="kraken2_databases" comment_char="#">
+    <!-- Locations of abricate report screening files in the required format -->
+    <table name="abricate_report_screening_files" comment_char="#">
         <columns>value, name, path</columns>
-        <file path="${__HERE__}/test-data/test_database.loc" />
+        <file path="${__HERE__}/test-data/abricate_report_screening_files.loc" />
     </table>
 </tables>