Mercurial > repos > bgruening > dotprep
annotate dotPrep.py @ 0:732267cab191 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
| author | bgruening |
|---|---|
| date | Wed, 03 Dec 2025 16:17:07 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
0
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
1 #! /usr/bin/env python |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
2 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
3 # Author: Maria Nattestad |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
4 # Email: maria.nattestad@gmail.com |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
5 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
6 import argparse |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
7 import gzip |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
8 import operator |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
9 import re |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
10 import time |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
11 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
12 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
13 import numpy as np |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
14 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
15 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
16 def run(args): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
17 filename = args.delta |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
18 unique_length = args.unique_length |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
19 output_filename = args.out |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
20 keep_small_uniques = True |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
21 max_overview_alignments = args.overview |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
22 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
23 header_lines_by_query, lines_by_query = get_query_ref_combinations(filename) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
24 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
25 unique_alignments = calculate_uniqueness( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
26 header_lines_by_query, lines_by_query, unique_length, keep_small_uniques |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
27 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
28 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
29 reference_lengths, fields_by_query = write_filtered_delta_file( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
30 filename, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
31 output_filename, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
32 unique_alignments, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
33 unique_length, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
34 header_lines_by_query, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
35 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
36 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
37 index_for_dot(reference_lengths, fields_by_query, output_filename, max_overview_alignments) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
38 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
39 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
40 def scrub(string): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
41 return ( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
42 string.replace(",", "_") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
43 .replace("!", "_") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
44 .replace("~", "_") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
45 .replace("#", "_") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
46 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
47 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
48 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
49 def get_query_ref_combinations(filename): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
50 print("header from delta file:") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
51 try: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
52 f = gzip.open(filename, "rt") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
53 print(f.readline().strip()) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
54 except OSError: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
55 f = open(filename, "r") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
56 print(f.readline().strip()) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
57 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
58 print(f.readline().strip()) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
59 linecounter = 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
60 current_query_name = "" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
61 current_header = "" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
62 lines_by_query = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
63 header_lines_by_query = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
64 before = time.time() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
65 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
66 for line in f: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
67 if line[0] == ">": |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
68 linecounter += 1 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
69 current_header = line.strip() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
70 current_query_name = scrub(current_header.split()[1]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
71 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
72 if header_lines_by_query.get(current_query_name) is None: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
73 lines_by_query[current_query_name] = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
74 header_lines_by_query[current_query_name] = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
75 else: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
76 fields = line.strip().split() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
77 if len(fields) > 4: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
78 query_min = min(int(fields[2]), int(fields[3])) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
79 query_max = max(int(fields[2]), int(fields[3])) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
80 lines_by_query[current_query_name].append((query_min, query_max)) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
81 header_lines_by_query[current_query_name].append(current_header) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
82 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
83 f.close() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
84 print( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
85 "First read through the file: %d seconds for %d query-reference combinations" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
86 % (time.time() - before, linecounter) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
87 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
88 return header_lines_by_query, lines_by_query |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
89 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
90 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
91 def calculate_uniqueness(header_lines_by_query, lines_by_query, unique_length, keep_small_uniques): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
92 before = time.time() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
93 unique_alignments = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
94 num_queries = len(lines_by_query) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
95 print("Filtering alignments of %d queries" % num_queries) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
96 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
97 num_query_step_to_report = max(num_queries / 100, 1) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
98 query_counter = 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
99 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
100 for query in lines_by_query: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
101 unique_alignments[query] = summarize_planesweep( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
102 lines_by_query[query], |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
103 unique_length_required=unique_length, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
104 keep_small_uniques=keep_small_uniques, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
105 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
106 query_counter += 1 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
107 if query_counter % num_query_step_to_report == 0: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
108 print("Progress: %d%%" % (query_counter * 100 / num_queries)) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
109 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
110 print("Progress: 100%") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
111 print( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
112 "Deciding which alignments to keep: %d seconds for %d queries" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
113 % (time.time() - before, num_queries) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
114 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
115 return unique_alignments |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
116 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
117 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
118 def summarize_planesweep(lines, unique_length_required, keep_small_uniques=False): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
119 unique_alignments = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
120 if len(lines) == 0: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
121 return [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
122 if len(lines) == 1: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
123 if keep_small_uniques or abs(lines[0][1] - lines[0][0]) >= unique_length_required: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
124 return [0] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
125 return [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
126 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
127 starts_and_stops = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
128 for query_min, query_max in lines: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
129 starts_and_stops.append((query_min, "start")) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
130 starts_and_stops.append((query_max, "stop")) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
131 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
132 sorted_starts_and_stops = sorted(starts_and_stops, key=operator.itemgetter(0)) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
133 current_coverage = 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
134 last_position = -1 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
135 sorted_unique_intervals_left = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
136 sorted_unique_intervals_right = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
137 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
138 for pos, change in sorted_starts_and_stops: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
139 if current_coverage == 1: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
140 sorted_unique_intervals_left.append(last_position) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
141 sorted_unique_intervals_right.append(pos) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
142 if change == "start": |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
143 current_coverage += 1 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
144 else: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
145 current_coverage -= 1 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
146 last_position = pos |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
147 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
148 linecounter = 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
149 for query_min, query_max in lines: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
150 i = binary_search(query_min, sorted_unique_intervals_left, 0, len(sorted_unique_intervals_left)) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
151 exact_match = False |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
152 if i < len(sorted_unique_intervals_left): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
153 if ( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
154 sorted_unique_intervals_left[i] == query_min |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
155 and sorted_unique_intervals_right[i] == query_max |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
156 ): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
157 exact_match = True |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
158 sum_uniq = 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
159 while ( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
160 i < len(sorted_unique_intervals_left) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
161 and sorted_unique_intervals_left[i] >= query_min |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
162 and sorted_unique_intervals_right[i] <= query_max |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
163 ): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
164 sum_uniq += sorted_unique_intervals_right[i] - sorted_unique_intervals_left[i] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
165 i += 1 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
166 if sum_uniq >= unique_length_required: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
167 unique_alignments.append(linecounter) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
168 elif keep_small_uniques and exact_match: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
169 unique_alignments.append(linecounter) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
170 linecounter += 1 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
171 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
172 return unique_alignments |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
173 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
174 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
175 def binary_search(query, numbers, left, right): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
176 if left >= right: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
177 return right |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
178 mid = int((right + left) / 2) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
179 if query == numbers[mid]: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
180 return mid |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
181 if query < numbers[mid]: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
182 return binary_search(query, numbers, left, mid) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
183 return binary_search(query, numbers, mid + 1, right) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
184 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
185 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
186 def natural_key(string_): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
187 return [int(s) if s.isdigit() else s for s in re.split(r"(\d+)", string_)] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
188 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
189 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
190 def write_filtered_delta_file(filename, output_filename, unique_alignments, unique_length, header_lines_by_query): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
191 before = time.time() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
192 f_out_delta = gzip.open( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
193 f"{output_filename}.uniqueAnchorFiltered_l{unique_length}.delta.gz", |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
194 "wt", |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
195 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
196 try: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
197 f = gzip.open(filename, "rt") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
198 header1 = f.readline() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
199 except OSError: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
200 f = open(filename, "r") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
201 header1 = f.readline() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
202 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
203 f_out_delta.write(header1) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
204 f_out_delta.write(f.readline()) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
205 linecounter = 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
206 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
207 list_of_unique_alignments = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
208 alignment_counter = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
209 keep_printing = False |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
210 ref_sequences = set() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
211 query_sequences = set() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
212 reference_lengths = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
213 query_lengths = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
214 fields_by_query = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
215 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
216 for line in f: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
217 linecounter += 1 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
218 if line[0] == ">": |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
219 fields = line.strip().split() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
220 query = scrub(fields[1]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
221 list_of_unique_alignments = unique_alignments[query] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
222 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
223 header_needed = any( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
224 line.strip() == header_lines_by_query[query][index] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
225 for index in list_of_unique_alignments |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
226 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
227 if header_needed: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
228 f_out_delta.write(line) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
229 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
230 alignment_counter[query] = alignment_counter.get(query, 0) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
231 current_reference_name = scrub(fields[0][1:]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
232 current_query_name = scrub(fields[1]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
233 current_reference_size = int(fields[2]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
234 current_query_size = int(fields[3]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
235 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
236 if current_reference_name not in ref_sequences: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
237 reference_lengths.append((current_reference_name, current_reference_size)) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
238 ref_sequences.add(current_reference_name) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
239 if current_query_name not in query_sequences: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
240 query_lengths[current_query_name] = current_query_size |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
241 query_sequences.add(current_query_name) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
242 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
243 else: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
244 fields = line.strip().split() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
245 if len(fields) > 4: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
246 ref_start = int(fields[0]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
247 ref_end = int(fields[1]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
248 query_start = int(fields[2]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
249 query_end = int(fields[3]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
250 csv_tag = "repetitive" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
251 if alignment_counter[query] in list_of_unique_alignments: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
252 f_out_delta.write(line) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
253 csv_tag = "unique" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
254 keep_printing = True |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
255 else: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
256 keep_printing = False |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
257 record = [ |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
258 ref_start, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
259 ref_end, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
260 query_start, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
261 query_end, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
262 current_reference_size, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
263 current_query_size, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
264 current_reference_name, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
265 current_query_name, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
266 csv_tag, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
267 ] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
268 if fields_by_query.get(current_query_name) is None: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
269 fields_by_query[current_query_name] = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
270 fields_by_query[current_query_name].append(record) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
271 alignment_counter[query] += 1 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
272 elif keep_printing: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
273 f_out_delta.write(line) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
274 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
275 f.close() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
276 f_out_delta.close() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
277 print( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
278 "Writing filtered delta file and capturing information for coords file: " |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
279 "%d seconds for %d total lines in file" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
280 % (time.time() - before, linecounter) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
281 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
282 return reference_lengths, fields_by_query |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
283 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
284 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
285 def index_for_dot(reference_lengths, fields_by_query, output_prefix, max_overview_alignments): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
286 reference_lengths.sort(key=lambda x: natural_key(x[0])) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
287 cumulative_sum = 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
288 ref_chrom_offsets = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
289 queries_by_reference = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
290 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
291 for ref_name, ref_length in reference_lengths: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
292 ref_chrom_offsets[ref_name] = cumulative_sum |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
293 cumulative_sum += ref_length |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
294 queries_by_reference[ref_name] = set() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
295 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
296 flip_by_query = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
297 unique_references_by_query = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
298 all_references_by_query = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
299 relative_ref_position_by_query = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
300 ordered_tags = ["unique", "repetitive"] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
301 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
302 f_out_coords = open(f"{output_prefix}.coords", "w", encoding="utf-8") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
303 f_out_coords.write("ref_start,ref_end,query_start,query_end,ref\n") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
304 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
305 query_byte_positions = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
306 query_lengths = {} |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
307 all_alignments = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
308 last_query = "" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
309 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
310 for query_name, lines in fields_by_query.items(): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
311 sum_forward = 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
312 sum_reverse = 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
313 ref_position_scores = [] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
314 unique_references_by_query[query_name] = set() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
315 all_references_by_query[query_name] = set() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
316 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
317 for fields in lines: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
318 tag = fields[8] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
319 query_lengths[query_name] = int(fields[5]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
320 ref_name = fields[6] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
321 all_references_by_query[query_name].add(ref_name) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
322 if tag == "unique": |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
323 query_start = int(fields[2]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
324 query_stop = int(fields[3]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
325 ref_start = int(fields[0]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
326 ref_stop = int(fields[1]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
327 alignment_length = abs(query_stop - query_start) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
328 unique_references_by_query[query_name].add(ref_name) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
329 queries_by_reference[ref_name].add(query_name) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
330 ref_position_scores.append( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
331 ref_chrom_offsets[ref_name] + (ref_start + ref_stop) / 2 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
332 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
333 if query_stop < query_start: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
334 sum_reverse += alignment_length |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
335 else: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
336 sum_forward += alignment_length |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
337 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
338 flip = sum_reverse > sum_forward |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
339 flip_by_query[query_name] = "-" if flip else "+" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
340 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
341 for tag in ordered_tags: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
342 query_byte_positions[(last_query, "end")] = f_out_coords.tell() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
343 query_byte_positions[(query_name, tag)] = f_out_coords.tell() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
344 f_out_coords.write(f"!{query_name}!{tag}\n") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
345 for fields in lines: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
346 if fields[8] == tag: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
347 if flip: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
348 fields[2] = int(fields[5]) - int(fields[2]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
349 fields[3] = int(fields[5]) - int(fields[3]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
350 output_fields = [fields[0], fields[1], fields[2], fields[3], fields[6]] |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
351 f_out_coords.write(",".join(str(i) for i in output_fields) + "\n") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
352 alignment_length = abs(int(fields[3]) - int(fields[2])) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
353 all_alignments.append((fields, alignment_length)) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
354 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
355 rel_pos = np.median(ref_position_scores) if ref_position_scores else 0 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
356 relative_ref_position_by_query.append((query_name, rel_pos)) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
357 last_query = query_name |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
358 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
359 query_byte_positions[(last_query, "end")] = f_out_coords.tell() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
360 relative_ref_position_by_query.sort(key=lambda x: x[1]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
361 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
362 f_out_index = open(f"{output_prefix}.coords.idx", "w", encoding="utf-8") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
363 f_out_index.write("#ref\n") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
364 f_out_index.write("ref,ref_length,matching_queries\n") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
365 for ref_name, ref_length in reference_lengths: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
366 f_out_index.write( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
367 f"{ref_name},{ref_length},{'~'.join(queries_by_reference[ref_name])}\n" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
368 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
369 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
370 f_out_index.write("#query\n") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
371 f_out_index.write( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
372 "query,query_length,orientation,bytePosition_unique," |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
373 "bytePosition_repetitive,bytePosition_end,unique_matching_refs,matching_refs\n" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
374 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
375 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
376 for query_name, _ in relative_ref_position_by_query: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
377 f_out_index.write( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
378 f"{query_name},{query_lengths[query_name]},{flip_by_query[query_name]}," |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
379 f"{query_byte_positions[(query_name, 'unique')]}," |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
380 f"{query_byte_positions[(query_name, 'repetitive')] - query_byte_positions[(query_name, 'unique')]}," |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
381 f"{query_byte_positions[(query_name, 'end')] - query_byte_positions[(query_name, 'repetitive')]}," |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
382 f"{'~'.join(unique_references_by_query[query_name])}," |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
383 f"{'~'.join(all_references_by_query[query_name])}\n" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
384 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
385 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
386 f_out_index.write("#overview\n") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
387 f_out_index.write("ref_start,ref_end,query_start,query_end,ref,query,tag\n") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
388 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
389 num_overview_alignments = min(max_overview_alignments, len(all_alignments)) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
390 if num_overview_alignments < len(all_alignments): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
391 print( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
392 f"Included the longest {max_overview_alignments} alignments in the index " |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
393 f"under #overview (change this with the --overview parameter), " |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
394 f"out of a total of {len(all_alignments)} alignments." |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
395 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
396 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
397 all_alignments.sort(key=lambda x: -x[1]) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
398 for fields, _ in all_alignments[:num_overview_alignments]: |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
399 f_out_index.write(",".join(str(i) for i in fields[:9]) + "\n") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
400 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
401 f_out_coords.close() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
402 f_out_index.close() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
403 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
404 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
405 def main(): |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
406 parser = argparse.ArgumentParser( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
407 description="Take a delta file, apply Assemblytics unique anchor filtering, " |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
408 "and prepare coordinates input files for Dot" |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
409 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
410 parser.add_argument("--delta", help="delta file", dest="delta", type=str, required=True) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
411 parser.add_argument("--out", help="output file", dest="out", type=str, default="output") |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
412 parser.add_argument( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
413 "--unique-length", |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
414 help="Minimum unique query sequence length required (default: 10000)", |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
415 dest="unique_length", |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
416 type=int, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
417 default=10000, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
418 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
419 parser.add_argument( |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
420 "--overview", |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
421 help="Number of alignments to include in coords.idx overview (default: 1000)", |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
422 dest="overview", |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
423 type=int, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
424 default=1000, |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
425 ) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
426 parser.set_defaults(func=run) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
427 args = parser.parse_args() |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
428 args.func(args) |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
429 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
430 |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
431 if __name__ == "__main__": |
|
732267cab191
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/dotprep commit 6db87bb19217d256b13cd66810043b667d1c7638
bgruening
parents:
diff
changeset
|
432 main() |
