comparison pick_plasmids_containing_genes.py @ 2:2dd1a0ed7cce draft

"planemo upload for repository https://github.com/dfornika/galaxy/tree/master/tools/pick_plasmids_containing_genes commit 9cb01222a76ffdf89fa8d7b69f32df7c2b1f860a-dirty"
author dfornika
date Sat, 02 Nov 2019 01:32:22 -0400
parents a938371b3bfd
children 109b9d1e2e99
comparison
equal deleted inserted replaced
1:ffa984b55de7 2:2dd1a0ed7cce
18 reader = csv.DictReader(f, delimiter="\t", quotechar='"') 18 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
19 for row in reader: 19 for row in reader:
20 screen.append(row) 20 screen.append(row)
21 return screen 21 return screen
22 22
23 def get_fieldnames(input_file):
24 with open(input_file) as f:
25 reader = csv.DictReader(f, delimiter="\t", quotechar='"')
26 fieldnames = reader.fieldnames
27 return fieldnames
28
29 def main(args): 23 def main(args):
30 # create output directory 24 # create output directory
31 try: 25 try:
32 os.mkdir(args.outdir) 26 os.mkdir(args.outdir)
33 except OSError as exc: 27 except OSError as exc:
39 # parse screening file 33 # parse screening file
40 screen = parse_screen_file(args.abricate_report_screening_file) 34 screen = parse_screen_file(args.abricate_report_screening_file)
41 contigs_with_genes_of_interest = [] 35 contigs_with_genes_of_interest = []
42 # parse all abricate reports and determine which ones contain genes of interest 36 # parse all abricate reports and determine which ones contain genes of interest
43 print("\t".join(["file", "gene_detected"])) 37 print("\t".join(["file", "gene_detected"]))
44 for abricate_report in args.abricate_reports: 38
45 gene_of_interest_detected = False 39 with open(args.concatenated_abricate_reports, 'r') as f:
46 with open(abricate_report, 'r') as f: 40 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"')
47 abricate_report_reader = csv.DictReader(f, delimiter="\t", quotechar='"') 41 for gene in screen:
48 for gene in screen: 42 for abricate_report_row in abricate_report_reader:
49 for abricate_report_row in abricate_report_reader: 43 if abricate_report_row['#FILE'] == '#FILE':
50 if re.search(gene['regex'], abricate_report_row['GENE']): 44 continue
51 gene_of_interest_detected = True 45 if re.search(gene['regex'], abricate_report_row['GENE']):
52 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE']) 46 contigs_with_genes_of_interest.append(abricate_report_row['SEQUENCE'])
53 f.seek(0) 47 f.seek(0)
54 next(abricate_report_reader) 48 next(abricate_report_reader)
55 print("\t".join([abricate_report, str(gene_of_interest_detected)]))
56 49
57 # copy the corresponding plasmid fasta files into outdir 50 # copy the corresponding plasmid fasta files into outdir
58 for contig in contigs_with_genes_of_interest: 51 for contig in contigs_with_genes_of_interest:
59 for plasmid in args.plasmids: 52 for plasmid in args.plasmids:
60 copy_plasmid = False 53 copy_plasmid = False
67 shutil.copy2(plasmid, args.outdir) 60 shutil.copy2(plasmid, args.outdir)
68 61
69 if __name__ == '__main__': 62 if __name__ == '__main__':
70 parser = argparse.ArgumentParser() 63 parser = argparse.ArgumentParser()
71 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)") 64 parser.add_argument("--plasmids", nargs='+', help="plasmid assemblies (fasta)")
72 parser.add_argument("--abricate_reports", nargs='+', help="abricate reports (tsv)") 65 parser.add_argument("--concatenated_abricate_reports", help="abricate reports (tsv)")
73 parser.add_argument("--abricate_report_screening_file", help="") 66 parser.add_argument("--abricate_report_screening_file", help="")
74 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory") 67 parser.add_argument("--outdir", dest="outdir", default=".", help="Output directory")
75 args = parser.parse_args() 68 args = parser.parse_args()
76 main(args) 69 main(args)