Mercurial > repos > tduigou > cloning_simulation
comparison cloning_simulation.py @ 0:dc450979fcd4 draft
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
author | tduigou |
---|---|
date | Fri, 16 May 2025 09:39:03 +0000 |
parents | |
children | 3171db614963 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dc450979fcd4 |
---|---|
1 import os | |
2 import dnacauldron | |
3 from Bio import SeqIO | |
4 import pandas | |
5 import argparse | |
6 import zipfile | |
7 | |
8 def cloning_simulation(files_to_assembly, domesticated_list, | |
9 csv_file, assembly_type, topology, | |
10 file_name_mapping, file_name_mapping_dom, | |
11 use_file_names_as_id, | |
12 outdir_simulation, output_simulation,enzyme): | |
13 | |
14 files_to_assembly = files_to_assembly.split(',') | |
15 | |
16 repository = dnacauldron.SequenceRepository() | |
17 repository.import_records(files=files_to_assembly, | |
18 use_file_names_as_ids=use_file_names_as_id, | |
19 topology=topology) | |
20 if domesticated_list: | |
21 domesticated_files = domesticated_list.split(',') | |
22 repository.import_records(files=domesticated_files, | |
23 use_file_names_as_ids=use_file_names_as_id, | |
24 topology=topology) | |
25 | |
26 #refine the real record name dict | |
27 if isinstance(file_name_mapping, str): | |
28 file_name_mapping = dict( | |
29 item.split(":") for item in file_name_mapping.split(",") | |
30 ) | |
31 real_names = { | |
32 os.path.splitext(os.path.basename(k))[0]: v.replace(".gb", "") | |
33 for k, v in file_name_mapping.items() | |
34 } | |
35 | |
36 #refine the real record name dict_dom | |
37 if file_name_mapping_dom == "": | |
38 file_name_mapping_dom={} | |
39 else: | |
40 if isinstance(file_name_mapping_dom, str): | |
41 file_name_mapping_dom = dict( | |
42 item.split(":") for item in file_name_mapping_dom.split(",") | |
43 ) | |
44 dom_real_names = { | |
45 os.path.splitext(os.path.basename(k))[0]: v.replace(".gb", "") | |
46 for k, v in file_name_mapping_dom.items() | |
47 } | |
48 real_names.update(dom_real_names) | |
49 | |
50 #update the records | |
51 | |
52 for key, record in list(repository.collections["parts"].items()): | |
53 current_id = record.id | |
54 if current_id in real_names: | |
55 new_id = real_names[current_id] | |
56 record.id = new_id | |
57 record.name = new_id | |
58 record.description = new_id | |
59 repository.collections["parts"][new_id] = repository.collections["parts"].pop(key) | |
60 ######################################################## | |
61 #print (f"repo: {vars(repository)}") | |
62 any(pandas.read_csv(csv_file, index_col=0, header=None).duplicated()) | |
63 | |
64 if assembly_type == "Type2sRestrictionAssembly": | |
65 assembly_class = dnacauldron.Type2sRestrictionAssembly | |
66 elif assembly_type == "GibsonAssembly": | |
67 assembly_class = dnacauldron.GibsonAssembly | |
68 elif assembly_type == "BASICAssembly": | |
69 assembly_class = dnacauldron.BASICAssembly | |
70 elif assembly_type == "BioBrickStandardAssembly": | |
71 assembly_class = dnacauldron.BioBrickStandardAssembly | |
72 elif assembly_type == "OligoPairAnnealin": | |
73 assembly_class = dnacauldron.OligoPairAnnealin | |
74 elif assembly_type == "LigaseCyclingReactionAssembly": | |
75 assembly_class = dnacauldron.LigaseCyclingReactionAssembly | |
76 else: | |
77 raise ValueError(f"Unsupported assembly type: {assembly_type}") | |
78 | |
79 new_csvname = "assambly.csv" | |
80 os.rename(csv_file, new_csvname) | |
81 | |
82 assembly_plan = dnacauldron.AssemblyPlan.from_spreadsheet( | |
83 name="auto_from_filename", | |
84 path=new_csvname, | |
85 assembly_class=assembly_class | |
86 ) | |
87 if enzyme != 'auto': | |
88 for assembly in assembly_plan.assemblies: | |
89 assembly.enzyme = enzyme | |
90 | |
91 simulation = assembly_plan.simulate(sequence_repository=repository) | |
92 stats = simulation.compute_stats() | |
93 print(stats) | |
94 | |
95 report_writer = dnacauldron.AssemblyReportWriter( | |
96 include_mix_graphs=True, | |
97 include_assembly_plots=True, | |
98 show_overhangs_in_graph=True, | |
99 annotate_parts_homologies=True, | |
100 include_pdf_report=True, | |
101 ) | |
102 simulation.write_report(outdir_simulation, assembly_report_writer=report_writer) | |
103 | |
104 # Append report files to .dat (ZIP) | |
105 with zipfile.ZipFile(output_simulation, mode='a', compression=zipfile.ZIP_DEFLATED) as zipf: | |
106 for root, dirs, files in os.walk(outdir_simulation): | |
107 for file in files: | |
108 full_path = os.path.join(root, file) | |
109 arcname = os.path.relpath(full_path, outdir_simulation) | |
110 zipf.write(full_path, arcname) | |
111 print("Files in the zip archive:") | |
112 for info in zipf.infolist(): | |
113 print(info.filename) | |
114 | |
115 return output_simulation | |
116 | |
117 | |
118 def parse_command_line_args(): | |
119 parser = argparse.ArgumentParser(description="Domestication") | |
120 | |
121 parser.add_argument("--parts_files", required=True, | |
122 help="List of GenBank files (Comma-separated)") | |
123 parser.add_argument("--domesticated_seq", required=True, | |
124 help="output of domestication (ganbank list)") | |
125 parser.add_argument("--assembly_csv", required=True, | |
126 help="csv assembly") | |
127 parser.add_argument('--assembly_plan_name', type=str, | |
128 help='type of assembly') | |
129 parser.add_argument('--topology', type=str, | |
130 help='"circular" or "linear"') | |
131 parser.add_argument('--file_name_mapping', type=str, | |
132 help='Mapping of Galaxy filenames to original filenames') | |
133 parser.add_argument('--file_name_mapping_dom', type=str, | |
134 help='Mapping of Galaxy filenames to original domestication filenames') | |
135 parser.add_argument("--use_file_names_as_id", type=lambda x: x.lower() == 'true', default=True, | |
136 help="Use file names as IDs (True/False)") | |
137 parser.add_argument("--outdir_simulation", required=True, | |
138 help="dir output for cloning simulation results") | |
139 parser.add_argument("--output_simulation", required=True, | |
140 help="zip output for cloning simulation results") | |
141 parser.add_argument('--enzyme', type=str, | |
142 help='enzyme to use') | |
143 | |
144 return parser.parse_args() | |
145 | |
146 if __name__ == "__main__": | |
147 args = parse_command_line_args() | |
148 | |
149 cloning_simulation( | |
150 args.parts_files, args.domesticated_seq, | |
151 args.assembly_csv, args.assembly_plan_name, args.topology, | |
152 args.file_name_mapping, args.file_name_mapping_dom, | |
153 args.use_file_names_as_id, | |
154 args.outdir_simulation,args.output_simulation, args.enzyme | |
155 ) |