diff screen_abricate_report.py @ 1:40003338a8e8 draft

planemo upload for repository https://github.com/dfornika/galaxytools/blob/master/tools/screen_abricate_report commit d9732cd3279d03dcc498bf2eb903f9e6120a9d85-dirty
author dfornika
date Fri, 27 Sep 2019 13:31:43 -0400
parents 2ffd23634c1e
children 1c1c680c70a0
line wrap: on
line diff
--- a/screen_abricate_report.py	Thu Sep 26 19:37:27 2019 -0400
+++ b/screen_abricate_report.py	Fri Sep 27 13:31:43 2019 -0400
@@ -17,30 +17,42 @@
             screen.append(row)
     return screen
 
-def get_abricate_report_fieldnames(abricate_report):
-    with open(abricate_report) as f:
+def get_fieldnames(input_file):
+    with open(input_file) as f:
         reader = csv.DictReader(f, delimiter="\t", quotechar='"')
         row = next(reader)
     fieldnames = row.keys()
     return fieldnames
     
 def main(args):
-    screen = parse_screen_file(args.screen)
-    abricate_report_fieldnames = get_abricate_report_fieldnames(args.abricate_report)
-    with open(args.abricate_report) as f:
-        reader = csv.DictReader(f, delimiter="\t", quotechar='"')
-        writer = csv.DictWriter(sys.stdout, delimiter="\t", quotechar='"', fieldnames=abricate_report_fieldnames)
-        writer.writeheader()
-        for row in reader:
-            for gene in screen:
-                if re.search(gene['regex'], row['GENE']):
-                    writer.writerow(row)
-        
+    screen = parse_screen_file(args.screening_file)
+    abricate_report_fieldnames = get_fieldnames(args.abricate_report)
+    gene_detection_status_fieldnames = ['gene_name', 'detected']
+    with open(args.abricate_report, 'r') as f1, open(args.screened_report, 'w') as f2, open(args.gene_detection_status, 'w') as f3:
+        abricate_report_reader = csv.DictReader(f1, delimiter="\t", quotechar='"')
+        screened_report_writer = csv.DictWriter(f2, delimiter="\t", quotechar='"', fieldnames=abricate_report_fieldnames)
+        gene_detection_status_writer = csv.DictWriter(f3, delimiter="\t", quotechar='"', fieldnames=gene_detection_status_fieldnames)
+        screened_report_writer.writeheader()
+        gene_detection_status_writer.writeheader()
+
+        for gene in screen:
+            gene_detection_status = {
+                'gene_name': gene['gene_name'],
+                'detected': False
+            }
+            for abricate_report_row in abricate_report_reader:
+                if re.search(gene['regex'], abricate_report_row['GENE']):
+                    gene_detection_status['detected'] = True
+                    screened_report_writer.writerow(abricate_report_row)
+            gene_detection_status_writer.writerow(gene_detection_status)
+            f1.seek(0) # return file pointer to start of abricate report
 
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument("--screen", help="TSV file defining genes to screen for")
-    parser.add_argument("abricate_report", help="Abricate output")
+    parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)")
+    parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)")
+    parser.add_argument("--screened_report", help="Output: Screened abricate report including only genes of interest (tsv)")
+    parser.add_argument("--gene_detection_status", help="Output: detection status for all genes listed in the screening file (tsv)")
     args = parser.parse_args()
     main(args)