comparison screen_abricate_report.py @ 1:40003338a8e8 draft

planemo upload for repository https://github.com/dfornika/galaxytools/blob/master/tools/screen_abricate_report commit d9732cd3279d03dcc498bf2eb903f9e6120a9d85-dirty
author dfornika
date Fri, 27 Sep 2019 13:31:43 -0400
parents 2ffd23634c1e
children 1c1c680c70a0
comparison
equal deleted inserted replaced
0:2ffd23634c1e 1:40003338a8e8
15 reader = csv.DictReader(f, delimiter="\t", quotechar='"') 15 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
16 for row in reader: 16 for row in reader:
17 screen.append(row) 17 screen.append(row)
18 return screen 18 return screen
19 19
20 def get_abricate_report_fieldnames(abricate_report): 20 def get_fieldnames(input_file):
21 with open(abricate_report) as f: 21 with open(input_file) as f:
22 reader = csv.DictReader(f, delimiter="\t", quotechar='"') 22 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
23 row = next(reader) 23 row = next(reader)
24 fieldnames = row.keys() 24 fieldnames = row.keys()
25 return fieldnames 25 return fieldnames
26 26
27 def main(args): 27 def main(args):
28 screen = parse_screen_file(args.screen) 28 screen = parse_screen_file(args.screening_file)
29 abricate_report_fieldnames = get_abricate_report_fieldnames(args.abricate_report) 29 abricate_report_fieldnames = get_fieldnames(args.abricate_report)
30 with open(args.abricate_report) as f: 30 gene_detection_status_fieldnames = ['gene_name', 'detected']
31 reader = csv.DictReader(f, delimiter="\t", quotechar='"') 31 with open(args.abricate_report, 'r') as f1, open(args.screened_report, 'w') as f2, open(args.gene_detection_status, 'w') as f3:
32 writer = csv.DictWriter(sys.stdout, delimiter="\t", quotechar='"', fieldnames=abricate_report_fieldnames) 32 abricate_report_reader = csv.DictReader(f1, delimiter="\t", quotechar='"')
33 writer.writeheader() 33 screened_report_writer = csv.DictWriter(f2, delimiter="\t", quotechar='"', fieldnames=abricate_report_fieldnames)
34 for row in reader: 34 gene_detection_status_writer = csv.DictWriter(f3, delimiter="\t", quotechar='"', fieldnames=gene_detection_status_fieldnames)
35 for gene in screen: 35 screened_report_writer.writeheader()
36 if re.search(gene['regex'], row['GENE']): 36 gene_detection_status_writer.writeheader()
37 writer.writerow(row) 37
38 38 for gene in screen:
39 gene_detection_status = {
40 'gene_name': gene['gene_name'],
41 'detected': False
42 }
43 for abricate_report_row in abricate_report_reader:
44 if re.search(gene['regex'], abricate_report_row['GENE']):
45 gene_detection_status['detected'] = True
46 screened_report_writer.writerow(abricate_report_row)
47 gene_detection_status_writer.writerow(gene_detection_status)
48 f1.seek(0) # return file pointer to start of abricate report
39 49
40 50
41 if __name__ == '__main__': 51 if __name__ == '__main__':
42 parser = argparse.ArgumentParser() 52 parser = argparse.ArgumentParser()
43 parser.add_argument("--screen", help="TSV file defining genes to screen for") 53 parser.add_argument("abricate_report", help="Input: Abricate report to screen (tsv)")
44 parser.add_argument("abricate_report", help="Abricate output") 54 parser.add_argument("--screening_file", help="Input: List of genes to screen for (tsv)")
55 parser.add_argument("--screened_report", help="Output: Screened abricate report including only genes of interest (tsv)")
56 parser.add_argument("--gene_detection_status", help="Output: detection status for all genes listed in the screening file (tsv)")
45 args = parser.parse_args() 57 args = parser.parse_args()
46 main(args) 58 main(args)