Mercurial > repos > bgruening > create_tool_recommendation_model
annotate extract_workflow_connections.py @ 4:f0da532be419 draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
author | bgruening |
---|---|
date | Fri, 06 May 2022 09:04:44 +0000 |
parents | 50753817983a |
children | 9ec705bd11cb |
rev | line source |
---|---|
0
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
1 """ |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
2 Extract workflow paths from the tabular file containing |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
3 input and output tools |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
4 """ |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
5 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
6 import csv |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
7 import random |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
8 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
9 import utils |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
10 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
11 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
12 class ExtractWorkflowConnections: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
13 def __init__(self): |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
14 """ Init method. """ |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
15 |
2
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
16 def collect_standard_connections(self, row): |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
17 published = row[8] |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
18 deleted = row[9] |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
19 has_errors = row[10] |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
20 if published == "t" and deleted == "f" and has_errors == "f": |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
21 return True |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
22 return False |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
23 |
0
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
24 def read_tabular_file(self, raw_file_path): |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
25 """ |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
26 Read tabular file and extract workflow connections |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
27 """ |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
28 print("Reading workflows...") |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
29 workflows = {} |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
30 workflow_paths_dup = "" |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
31 workflow_parents = dict() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
32 workflow_paths = list() |
2
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
33 unique_paths = dict() |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
34 standard_connections = dict() |
4
f0da532be419
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents:
2
diff
changeset
|
35 with open(raw_file_path, "rt") as workflow_connections_file: |
f0da532be419
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents:
2
diff
changeset
|
36 workflow_connections = csv.reader(workflow_connections_file, delimiter="\t") |
0
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
37 for index, row in enumerate(workflow_connections): |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
38 wf_id = str(row[0]) |
4
f0da532be419
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents:
2
diff
changeset
|
39 in_tool = row[3].strip() |
f0da532be419
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents:
2
diff
changeset
|
40 out_tool = row[6].strip() |
0
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
41 if wf_id not in workflows: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
42 workflows[wf_id] = list() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
43 if out_tool and in_tool and out_tool != in_tool: |
2
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
44 workflows[wf_id].append((out_tool, in_tool)) |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
45 qc = self.collect_standard_connections(row) |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
46 if qc: |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
47 i_t = utils.format_tool_id(in_tool) |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
48 o_t = utils.format_tool_id(out_tool) |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
49 if i_t not in standard_connections: |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
50 standard_connections[i_t] = list() |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
51 if o_t not in standard_connections[i_t]: |
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
52 standard_connections[i_t].append(o_t) |
0
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
53 print("Processing workflows...") |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
54 wf_ctr = 0 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
55 for wf_id in workflows: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
56 wf_ctr += 1 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
57 workflow_parents[wf_id] = self.read_workflow(wf_id, workflows[wf_id]) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
58 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
59 for wf_id in workflow_parents: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
60 flow_paths = list() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
61 parents_graph = workflow_parents[wf_id] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
62 roots, leaves = self.get_roots_leaves(parents_graph) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
63 for root in roots: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
64 for leaf in leaves: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
65 paths = self.find_tool_paths_workflow(parents_graph, root, leaf) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
66 # reverse the paths as they are computed from leaves to roots leaf |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
67 paths = [tool_path for tool_path in paths] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
68 if len(paths) > 0: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
69 flow_paths.extend(paths) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
70 workflow_paths.extend(flow_paths) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
71 print("Workflows processed: %d" % wf_ctr) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
72 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
73 # remove slashes from the tool ids |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
74 wf_paths_no_slash = list() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
75 for path in workflow_paths: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
76 path_no_slash = [utils.format_tool_id(tool_id) for tool_id in path] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
77 wf_paths_no_slash.append(path_no_slash) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
78 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
79 # collect duplicate paths |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
80 for path in wf_paths_no_slash: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
81 workflow_paths_dup += ",".join(path) + "\n" |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
82 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
83 # collect unique paths |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
84 unique_paths = list(workflow_paths_dup.split("\n")) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
85 unique_paths = list(filter(None, unique_paths)) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
86 random.shuffle(unique_paths) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
87 no_dup_paths = list(set(unique_paths)) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
88 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
89 print("Finding compatible next tools...") |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
90 compatible_next_tools = self.set_compatible_next_tools(no_dup_paths) |
2
50753817983a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents:
0
diff
changeset
|
91 return unique_paths, compatible_next_tools, standard_connections |
0
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
92 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
93 def set_compatible_next_tools(self, workflow_paths): |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
94 """ |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
95 Find next tools for each tool |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
96 """ |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
97 next_tools = dict() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
98 for path in workflow_paths: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
99 path_split = path.split(",") |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
100 for window in range(0, len(path_split) - 1): |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
101 current_next_tools = path_split[window: window + 2] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
102 current_tool = current_next_tools[0] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
103 next_tool = current_next_tools[1] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
104 try: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
105 next_tools[current_tool] += "," + next_tool |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
106 except Exception: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
107 next_tools[current_tool] = next_tool |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
108 for tool in next_tools: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
109 next_tools[tool] = ",".join(list(set(next_tools[tool].split(",")))) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
110 return next_tools |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
111 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
112 def read_workflow(self, wf_id, workflow_rows): |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
113 """ |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
114 Read all connections for a workflow |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
115 """ |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
116 tool_parents = dict() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
117 for connection in workflow_rows: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
118 in_tool = connection[0] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
119 out_tool = connection[1] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
120 if out_tool not in tool_parents: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
121 tool_parents[out_tool] = list() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
122 if in_tool not in tool_parents[out_tool]: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
123 tool_parents[out_tool].append(in_tool) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
124 return tool_parents |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
125 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
126 def get_roots_leaves(self, graph): |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
127 roots = list() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
128 leaves = list() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
129 all_parents = list() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
130 for item in graph: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
131 all_parents.extend(graph[item]) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
132 all_parents = list(set(all_parents)) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
133 children = graph.keys() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
134 roots = list(set(all_parents).difference(set(children))) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
135 leaves = list(set(children).difference(set(all_parents))) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
136 return roots, leaves |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
137 |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
138 def find_tool_paths_workflow(self, graph, start, end, path=[]): |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
139 path = path + [end] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
140 if start == end: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
141 return [path] |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
142 path_list = list() |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
143 if end in graph: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
144 for node in graph[end]: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
145 if node not in path: |
4
f0da532be419
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents:
2
diff
changeset
|
146 new_tools_paths = self.find_tool_paths_workflow( |
f0da532be419
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents:
2
diff
changeset
|
147 graph, start, node, path |
f0da532be419
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents:
2
diff
changeset
|
148 ) |
0
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
149 for tool_path in new_tools_paths: |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
150 path_list.append(tool_path) |
22ebbac136c7
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff
changeset
|
151 return path_list |