diff pick_plasmids_containing_genes.py @ 2:2dd1a0ed7cce draft

"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
author dfornika
date Sat, 02 Nov 2019 01:32:22 -0400
parents a938371b3bfd
children 109b9d1e2e99
line wrap: on
line diff
--- a/pick_plasmids_containing_genes.py	Sat Nov 02 00:34:49 2019 -0400
+++ b/pick_plasmids_containing_genes.py	Sat Nov 02 01:32:22 2019 -0400
@@ -20,12 +20,6 @@
             screen.append(row)
     return screen
 
-def get_fieldnames(input_file):
-    with open(input_file) as f:
-        reader = csv.DictReader(f, delimiter="\t", quotechar='"')
-        fieldnames = reader.fieldnames
-    return fieldnames
-
 def main(args):
     # create output directory
     try:
@@ -41,18 +35,17 @@
     contigs_with_genes_of_interest = []
     # parse all abricate reports and determine which ones contain genes of interest
     print("\t".join(["file", "gene_detected"]))
-    for abricate_report in args.abricate_reports:
-        gene_of_interest_detected = False
-        with open(abricate_report, 'r') as f:
-            abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"')
-            for gene in screen:
-                for abricate_report_row in abricate_report_reader:
-                    if re.search(gene['regex'], abricate_report_row['GENE']):
-                        gene_of_interest_detected = True
-                        contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE'])
-                f.seek(0)
-                next(abricate_report_reader)
-        print("\t".join([abricate_report, str(gene_of_interest_detected)]))
+
+    with open(args.concatenated_abricate_reports, 'r') as f:
+        abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"')
+        for gene in screen:
+            for abricate_report_row in abricate_report_reader:
+                if abricate_report_row['#FILE'] == '#FILE':
+                    continue
+                if re.search(gene['regex'], abricate_report_row['GENE']):
+                    contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE'])
+            f.seek(0)
+            next(abricate_report_reader)
 
     # copy the corresponding plasmid fasta files into outdir        
     for contig in contigs_with_genes_of_interest:
@@ -69,7 +62,7 @@
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)")
-    parser.add_argument("--abricate_reports", nargs='+', help="abricate reports (tsv)")
+    parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)")
     parser.add_argument("--abricate_report_screening_file", help="")
     parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
     args = parser.parse_args()