changeset 2:2dd1a0ed7cce draft

"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
author dfornika
date Sat, 02 Nov 2019 01:32:22 -0400
parents ffa984b55de7
children 50640b06fca5
files pick_plasmids_containing_genes.py pick_plasmids_containing_genes.xml test-data/concatenated_abricate_reports.tsv
diffstat 3 files changed, 34 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/pick_plasmids_containing_genes.py	Sat Nov 02 00:34:49 2019 -0400
+++ b/pick_plasmids_containing_genes.py	Sat Nov 02 01:32:22 2019 -0400
@@ -20,12 +20,6 @@
             screen.append(row)
     return screen
 
-def get_fieldnames(input_file):
-    with open(input_file) as f:
-        reader = csv.DictReader(f, delimiter="\t", quotechar='"')
-        fieldnames = reader.fieldnames
-    return fieldnames
-
 def main(args):
     # create output directory
     try:
@@ -41,18 +35,17 @@
     contigs_with_genes_of_interest = []
     # parse all abricate reports and determine which ones contain genes of interest
     print("\t".join(["file", "gene_detected"]))
-    for abricate_report in args.abricate_reports:
-        gene_of_interest_detected = False
-        with open(abricate_report, 'r') as f:
-            abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"')
-            for gene in screen:
-                for abricate_report_row in abricate_report_reader:
-                    if re.search(gene['regex'], abricate_report_row['GENE']):
-                        gene_of_interest_detected = True
-                        contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE'])
-                f.seek(0)
-                next(abricate_report_reader)
-        print("\t".join([abricate_report, str(gene_of_interest_detected)]))
+
+    with open(args.concatenated_abricate_reports, 'r') as f:
+        abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"')
+        for gene in screen:
+            for abricate_report_row in abricate_report_reader:
+                if abricate_report_row['#FILE'] == '#FILE':
+                    continue
+                if re.search(gene['regex'], abricate_report_row['GENE']):
+                    contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE'])
+            f.seek(0)
+            next(abricate_report_reader)
 
     # copy the corresponding plasmid fasta files into outdir        
     for contig in contigs_with_genes_of_interest:
@@ -69,7 +62,7 @@
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)")
-    parser.add_argument("--abricate_reports", nargs='+', help="abricate reports (tsv)")
+    parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)")
     parser.add_argument("--abricate_report_screening_file", help="")
     parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
     args = parser.parse_args()
--- a/pick_plasmids_containing_genes.xml	Sat Nov 02 00:34:49 2019 -0400
+++ b/pick_plasmids_containing_genes.xml	Sat Nov 02 01:32:22 2019 -0400
@@ -4,20 +4,17 @@
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
         '$__tool_directory__/pick_plasmids_containing_genes.py'
-        --plasmids 
+        --plasmids
         #for $plasmid in $plasmids:
           '${plasmid}'
         #end for
-        --abricate_reports
-        #for $abricate_report in $abricate_reports:
-          '${abricate_report}'
-        #end for
+        --concatenated_abricate_reports '${concatenated_abricate_reports}'
         --abricate_report_screening_file '${screening_file.fields.path}'
         --outdir outdir
     ]]></command>
     <inputs>
         <param name="plasmids" type="data_collection" collection_type="list" format="fasta" />
-        <param name="abricate_reports" type="data_collection" collection_type="list" format="tabular" />
+        <param name="concatenated_abricate_reports" type="data_collection" collection_type="list" format="tabular,txt" />
         <param name="screening_file" type="select" format="tabular">
 	    <options from_data_table="abricate_report_screening_files">
 	        <validator type="no_options" message="No abricate report screening files are available" />
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/concatenated_abricate_reports.tsv	Sat Nov 02 01:32:22 2019 -0400
@@ -0,0 +1,19 @@
+#FILE	SEQUENCE	START	END	STRAND	GENE	COVERAGE	COVERAGE_MAP	GAPS	%COVERAGE	%IDENTITY	DATABASE	ACCESSION	PRODUCT	RESISTANCE
+#FILE	SEQUENCE	START	END	STRAND	GENE	COVERAGE	COVERAGE_MAP	GAPS	%COVERAGE	%IDENTITY	DATABASE	ACCESSION	PRODUCT	RESISTANCE
+plasmid_1550	SRR9113487.fasta|65_length=4058_depth=2.47x	2638	3513	-	CTX-M-15	1-876/876	===============	0/0	100.00	100.00	card	AY044436:1436-2312	CTX-M-15 is a beta-lactamase found in the Enterobacteriaceae family	cephalosporin
+plasmid_1550	SRR9113487.fasta|77_length=2186_depth=3.14x	1	442	-	catB3	1-442/633	===========....	0/0	69.83	100.00	card	JX101693.1:58201-58834	catB3 is a plasmid or chromosome-encoded variant of the cat gene found in Salmonella typhimurium Acinetobacter baumannii and Escherichia coli	phenicol
+plasmid_1550	SRR9113487.fasta|77_length=2186_depth=3.14x	580	1410	-	OXA-1	1-831/831	===============	0/0	100.00	100.00	card	JN420336.1:1400-2231	OXA-1 is a beta-lactamase found in E. coli	penam/cephalosporin
+plasmid_1550	SRR9113487.fasta|77_length=2186_depth=3.14x	1475	2124	-	OXA-368	2072-2721/2721	...........====	0/0	23.89	99.08	card	KT736121.1:1-2722	OXA-368 is a beta-lactamase found in Aeromonas sobria. From the Lahey list of OXA beta-lactamases.	cephalosporin/penam
+#FILE	SEQUENCE	START	END	STRAND	GENE	COVERAGE	COVERAGE_MAP	GAPS	%COVERAGE	%IDENTITY	DATABASE	ACCESSION	PRODUCT	RESISTANCE
+plasmid_2719	SRR9113487.fasta|53_length=9674_depth=5.42x	496	1308	+	NDM-5	1-813/813	===============	0/0	100.00	100.00	card	JN104597:115-928	New Delhi beta-lactamase NDM-5.	cephamycin/penam/cephalosporin/carbapenem
+plasmid_2719	SRR9113487.fasta|53_length=9674_depth=5.42x	1312	1677	+	determinant_of_bleomycin_resistance	1-366/366	===============	0/0	100.00	100.00	card	KC503911.1:9884-10250	A novel bleomycin resistance protein encoded by a metallo-beta-lactamase-associated ble gene. Expression of BRP(MBL) confers resistance to bleomycin and bleomycin-like antibiotics in Enterobacteriaceae and Acinetobacter where it is co-expressed with an MBL and controlled by the same promoter region.	glycopeptide
+plasmid_2719	SRR9113487.fasta|53_length=9674_depth=5.42x	5621	6460	-	sul1	1-840/840	===============	0/0	100.00	100.00	card	JF969163:1054-1894	Sul1 is a sulfonamide resistant dihydropteroate synthase of Gram-negative bacteria. It is linked to other resistance genes of class 1 integrons.	sulfonamide
+plasmid_2719	SRR9113487.fasta|53_length=9674_depth=5.42x	6965	7744	-	aadA2	1-780/780	===============	0/0	100.00	99.87	card	AF156486:5013-5793	aadA2 is an aminoglycoside nucleotidyltransferase gene encoded by plasmids and integrons in K. pneumoniae Salmonella spp. Corynebacterium glutamicum C. freundii and Aeromonas spp.	aminoglycoside
+plasmid_2719	SRR9113487.fasta|53_length=9674_depth=5.42x	8164	8661	-	dfrA12	1-498/498	===============	0/0	100.00	100.00	card	GU585907.1:21606-22104	dfrA12 is an integron-encoded dihydrofolate reductase found in Vibrio cholerae	diaminopyrimidine
+plasmid_2719	SRR9113487.fasta|53_length=9674_depth=5.42x	8662	8910	-	AAC(6')-Ib7	1-249/980	====...........	0/0	25.41	98.80	card	Y11946.1:1-981	AAC(6')-Ib7 is a plasmid-encoded aminoglycoside acetyltransferase in E. cloacae and C. freundii	aminoglycoside
+#FILE	SEQUENCE	START	END	STRAND	GENE	COVERAGE	COVERAGE_MAP	GAPS	%COVERAGE	%IDENTITY	DATABASE	ACCESSION	PRODUCT	RESISTANCE
+plasmid_739	SRR9113487.fasta|58_length=8070_depth=5.28x	70	972	+	mphA	1-903/906	===============	0/0	99.67	100.00	card	D16251.1:1626-2532	The mphA gene encodes for resistance enzyme MPH(2')-I which preferentially inactivate 14-membered macrolides (e.g.erythromycin telithromycin roxithromycin) over 16-membered macrolides (e.g.tylosin spiramycin). It phosphorylates macrolides at 2'-OH hydroxyl of desosamine sugar of macrolides in a GTP-dependent manner.	macrolide
+plasmid_739	SRR9113487.fasta|58_length=8070_depth=5.28x	5102	5962	+	TEM-1	1-861/861	===============	0/0	100.00	99.88	card	AL513383:161911-162772	TEM-1 is a broad-spectrum beta-lactamase found in many Gram-negative bacteria. Confers resistance to penicillins and first generation cephalosphorins.	penam/cephalosporin/penem/monobactam
+plasmid_739	SRR9113487.fasta|58_length=8070_depth=5.28x	6132	6887	+	rmtB	1-756/756	===============	0/0	100.00	100.00	card	AM886293.1:116480-117236	RmtB is a 16S rRNA methyltransferase that targets mature or nearly mature 30S subunits. It transfers a methyl group from S-adenosyl-L-methionine to N7-G1405 of the 16S rRNA an aminoglycoside binding site.	aminoglycoside
+#FILE	SEQUENCE	START	END	STRAND	GENE	COVERAGE	COVERAGE_MAP	GAPS	%COVERAGE	%IDENTITY	DATABASE	ACCESSION	PRODUCT	RESISTANCE
+#FILE	SEQUENCE	START	END	STRAND	GENE	COVERAGE	COVERAGE_MAP	GAPS	%COVERAGE	%IDENTITY	DATABASE	ACCESSION	PRODUCT	RESISTANCE