Mercurial > repos > tduigou > cloning_simulation
annotate cloning_simulation.py @ 0:dc450979fcd4 draft
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
author | tduigou |
---|---|
date | Fri, 16 May 2025 09:39:03 +0000 |
parents | |
children | 3171db614963 |
rev | line source |
---|---|
0
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
1 import os |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
2 import dnacauldron |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
3 from Bio import SeqIO |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
4 import pandas |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
5 import argparse |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
6 import zipfile |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
7 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
8 def cloning_simulation(files_to_assembly, domesticated_list, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
9 csv_file, assembly_type, topology, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
10 file_name_mapping, file_name_mapping_dom, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
11 use_file_names_as_id, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
12 outdir_simulation, output_simulation,enzyme): |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
13 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
14 files_to_assembly = files_to_assembly.split(',') |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
15 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
16 repository = dnacauldron.SequenceRepository() |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
17 repository.import_records(files=files_to_assembly, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
18 use_file_names_as_ids=use_file_names_as_id, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
19 topology=topology) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
20 if domesticated_list: |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
21 domesticated_files = domesticated_list.split(',') |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
22 repository.import_records(files=domesticated_files, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
23 use_file_names_as_ids=use_file_names_as_id, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
24 topology=topology) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
25 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
26 #refine the real record name dict |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
27 if isinstance(file_name_mapping, str): |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
28 file_name_mapping = dict( |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
29 item.split(":") for item in file_name_mapping.split(",") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
30 ) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
31 real_names = { |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
32 os.path.splitext(os.path.basename(k))[0]: v.replace(".gb", "") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
33 for k, v in file_name_mapping.items() |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
34 } |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
35 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
36 #refine the real record name dict_dom |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
37 if file_name_mapping_dom == "": |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
38 file_name_mapping_dom={} |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
39 else: |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
40 if isinstance(file_name_mapping_dom, str): |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
41 file_name_mapping_dom = dict( |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
42 item.split(":") for item in file_name_mapping_dom.split(",") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
43 ) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
44 dom_real_names = { |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
45 os.path.splitext(os.path.basename(k))[0]: v.replace(".gb", "") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
46 for k, v in file_name_mapping_dom.items() |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
47 } |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
48 real_names.update(dom_real_names) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
49 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
50 #update the records |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
51 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
52 for key, record in list(repository.collections["parts"].items()): |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
53 current_id = record.id |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
54 if current_id in real_names: |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
55 new_id = real_names[current_id] |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
56 record.id = new_id |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
57 record.name = new_id |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
58 record.description = new_id |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
59 repository.collections["parts"][new_id] = repository.collections["parts"].pop(key) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
60 ######################################################## |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
61 #print (f"repo: {vars(repository)}") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
62 any(pandas.read_csv(csv_file, index_col=0, header=None).duplicated()) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
63 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
64 if assembly_type == "Type2sRestrictionAssembly": |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
65 assembly_class = dnacauldron.Type2sRestrictionAssembly |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
66 elif assembly_type == "GibsonAssembly": |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
67 assembly_class = dnacauldron.GibsonAssembly |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
68 elif assembly_type == "BASICAssembly": |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
69 assembly_class = dnacauldron.BASICAssembly |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
70 elif assembly_type == "BioBrickStandardAssembly": |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
71 assembly_class = dnacauldron.BioBrickStandardAssembly |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
72 elif assembly_type == "OligoPairAnnealin": |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
73 assembly_class = dnacauldron.OligoPairAnnealin |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
74 elif assembly_type == "LigaseCyclingReactionAssembly": |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
75 assembly_class = dnacauldron.LigaseCyclingReactionAssembly |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
76 else: |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
77 raise ValueError(f"Unsupported assembly type: {assembly_type}") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
78 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
79 new_csvname = "assambly.csv" |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
80 os.rename(csv_file, new_csvname) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
81 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
82 assembly_plan = dnacauldron.AssemblyPlan.from_spreadsheet( |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
83 name="auto_from_filename", |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
84 path=new_csvname, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
85 assembly_class=assembly_class |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
86 ) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
87 if enzyme != 'auto': |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
88 for assembly in assembly_plan.assemblies: |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
89 assembly.enzyme = enzyme |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
90 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
91 simulation = assembly_plan.simulate(sequence_repository=repository) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
92 stats = simulation.compute_stats() |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
93 print(stats) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
94 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
95 report_writer = dnacauldron.AssemblyReportWriter( |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
96 include_mix_graphs=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
97 include_assembly_plots=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
98 show_overhangs_in_graph=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
99 annotate_parts_homologies=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
100 include_pdf_report=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
101 ) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
102 simulation.write_report(outdir_simulation, assembly_report_writer=report_writer) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
103 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
104 # Append report files to .dat (ZIP) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
105 with zipfile.ZipFile(output_simulation, mode='a', compression=zipfile.ZIP_DEFLATED) as zipf: |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
106 for root, dirs, files in os.walk(outdir_simulation): |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
107 for file in files: |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
108 full_path = os.path.join(root, file) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
109 arcname = os.path.relpath(full_path, outdir_simulation) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
110 zipf.write(full_path, arcname) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
111 print("Files in the zip archive:") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
112 for info in zipf.infolist(): |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
113 print(info.filename) |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
114 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
115 return output_simulation |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
116 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
117 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
118 def parse_command_line_args(): |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
119 parser = argparse.ArgumentParser(description="Domestication") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
120 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
121 parser.add_argument("--parts_files", required=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
122 help="List of GenBank files (Comma-separated)") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
123 parser.add_argument("--domesticated_seq", required=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
124 help="output of domestication (ganbank list)") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
125 parser.add_argument("--assembly_csv", required=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
126 help="csv assembly") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
127 parser.add_argument('--assembly_plan_name', type=str, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
128 help='type of assembly') |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
129 parser.add_argument('--topology', type=str, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
130 help='"circular" or "linear"') |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
131 parser.add_argument('--file_name_mapping', type=str, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
132 help='Mapping of Galaxy filenames to original filenames') |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
133 parser.add_argument('--file_name_mapping_dom', type=str, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
134 help='Mapping of Galaxy filenames to original domestication filenames') |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
135 parser.add_argument("--use_file_names_as_id", type=lambda x: x.lower() == 'true', default=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
136 help="Use file names as IDs (True/False)") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
137 parser.add_argument("--outdir_simulation", required=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
138 help="dir output for cloning simulation results") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
139 parser.add_argument("--output_simulation", required=True, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
140 help="zip output for cloning simulation results") |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
141 parser.add_argument('--enzyme', type=str, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
142 help='enzyme to use') |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
143 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
144 return parser.parse_args() |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
145 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
146 if __name__ == "__main__": |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
147 args = parse_command_line_args() |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
148 |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
149 cloning_simulation( |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
150 args.parts_files, args.domesticated_seq, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
151 args.assembly_csv, args.assembly_plan_name, args.topology, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
152 args.file_name_mapping, args.file_name_mapping_dom, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
153 args.use_file_names_as_id, |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
154 args.outdir_simulation,args.output_simulation, args.enzyme |
dc450979fcd4
planemo upload for repository https://github.com/Edinburgh-Genome-Foundry/DnaCauldron/tree/master commit 6ae809b563b40bcdb6be2e74fe2a84ddad5484ae
tduigou
parents:
diff
changeset
|
155 ) |