Mercurial > repos > eduardo > annotateviz
comparison jbrowse.py @ 0:7537482eed36 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/annotateviz commit 9bbaa3eacc76ff3bf2b6da313cc0d85705f15dd1-dirty
author | eduardo |
---|---|
date | Sat, 17 Jun 2017 13:31:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7537482eed36 |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 import copy | |
4 import hashlib | |
5 import json | |
6 import logging | |
7 import os | |
8 import shutil | |
9 import struct | |
10 import subprocess | |
11 import tempfile | |
12 import xml.etree.ElementTree as ET | |
13 from collections import defaultdict | |
14 | |
15 from Bio.Data import CodonTable | |
16 | |
17 logging.basicConfig(level=logging.INFO) | |
18 log = logging.getLogger('jbrowse') | |
19 | |
20 | |
21 class ColorScaling(object): | |
22 | |
23 COLOR_FUNCTION_TEMPLATE = """ | |
24 function(feature, variableName, glyphObject, track) {{ | |
25 var score = {score}; | |
26 {opacity} | |
27 return 'rgba({red}, {green}, {blue}, ' + opacity + ')'; | |
28 }} | |
29 """ | |
30 | |
31 COLOR_FUNCTION_TEMPLATE_QUAL = """ | |
32 function(feature, variableName, glyphObject, track) {{ | |
33 var search_up = function self(sf, attr){{ | |
34 if(sf.get(attr) !== undefined){{ | |
35 return sf.get(attr); | |
36 }} | |
37 if(sf.parent() === undefined) {{ | |
38 return; | |
39 }}else{{ | |
40 return self(sf.parent(), attr); | |
41 }} | |
42 }}; | |
43 | |
44 var search_down = function self(sf, attr){{ | |
45 if(sf.get(attr) !== undefined){{ | |
46 return sf.get(attr); | |
47 }} | |
48 if(sf.children() === undefined) {{ | |
49 return; | |
50 }}else{{ | |
51 var kids = sf.children(); | |
52 for(var child_idx in kids){{ | |
53 var x = self(kids[child_idx], attr); | |
54 if(x !== undefined){{ | |
55 return x; | |
56 }} | |
57 }} | |
58 return; | |
59 }} | |
60 }}; | |
61 | |
62 var color = ({user_spec_color} || search_up(feature, 'color') || search_down(feature, 'color') || {auto_gen_color}); | |
63 var score = (search_up(feature, 'score') || search_down(feature, 'score')); | |
64 {opacity} | |
65 var result = /^#?([a-f\d]{{2}})([a-f\d]{{2}})([a-f\d]{{2}})$/i.exec(color); | |
66 var red = parseInt(result[1], 16); | |
67 var green = parseInt(result[2], 16); | |
68 var blue = parseInt(result[3], 16); | |
69 if(isNaN(opacity) || opacity < 0){{ opacity = 0; }} | |
70 return 'rgba(' + red + ',' + green + ',' + blue + ',' + opacity + ')'; | |
71 }} | |
72 """ | |
73 | |
74 OPACITY_MATH = { | |
75 'linear': """ | |
76 var opacity = (score - ({min})) / (({max}) - ({min})); | |
77 """, | |
78 'logarithmic': """ | |
79 var opacity = (score - ({min})) / (({max}) - ({min})); | |
80 opacity = Math.log10(opacity) + Math.log10({max}); | |
81 """, | |
82 'blast': """ | |
83 var opacity = 0; | |
84 if(score == 0.0) { | |
85 opacity = 1; | |
86 } else{ | |
87 opacity = (20 - Math.log10(score)) / 180; | |
88 } | |
89 """ | |
90 } | |
91 | |
92 BREWER_COLOUR_IDX = 0 | |
93 BREWER_COLOUR_SCHEMES = [ | |
94 (166, 206, 227), | |
95 (31, 120, 180), | |
96 (178, 223, 138), | |
97 (51, 160, 44), | |
98 (251, 154, 153), | |
99 (227, 26, 28), | |
100 (253, 191, 111), | |
101 (255, 127, 0), | |
102 (202, 178, 214), | |
103 (106, 61, 154), | |
104 (255, 255, 153), | |
105 (177, 89, 40), | |
106 (228, 26, 28), | |
107 (55, 126, 184), | |
108 (77, 175, 74), | |
109 (152, 78, 163), | |
110 (255, 127, 0), | |
111 ] | |
112 | |
113 BREWER_DIVERGING_PALLETES = { | |
114 'BrBg': ("#543005", "#003c30"), | |
115 'PiYg': ("#8e0152", "#276419"), | |
116 'PRGn': ("#40004b", "#00441b"), | |
117 'PuOr': ("#7f3b08", "#2d004b"), | |
118 'RdBu': ("#67001f", "#053061"), | |
119 'RdGy': ("#67001f", "#1a1a1a"), | |
120 'RdYlBu': ("#a50026", "#313695"), | |
121 'RdYlGn': ("#a50026", "#006837"), | |
122 'Spectral': ("#9e0142", "#5e4fa2"), | |
123 } | |
124 | |
125 def __init__(self): | |
126 self.brewer_colour_idx = 0 | |
127 | |
128 def rgb_from_hex(self, hexstr): | |
129 # http://stackoverflow.com/questions/4296249/how-do-i-convert-a-hex-triplet-to-an-rgb-tuple-and-back | |
130 return struct.unpack('BBB', hexstr.decode('hex')) | |
131 | |
132 def min_max_gff(self, gff_file): | |
133 min_val = None | |
134 max_val = None | |
135 with open(gff_file, 'r') as handle: | |
136 for line in handle: | |
137 try: | |
138 value = float(line.split('\t')[5]) | |
139 min_val = min(value, (min_val or value)) | |
140 max_val = max(value, (max_val or value)) | |
141 | |
142 if value < min_val: | |
143 min_val = value | |
144 | |
145 if value > max_val: | |
146 max_val = value | |
147 except Exception: | |
148 pass | |
149 return min_val, max_val | |
150 | |
151 def hex_from_rgb(self, r, g, b): | |
152 return '#%02x%02x%02x' % (r, g, b) | |
153 | |
154 def _get_colours(self): | |
155 r, g, b = self.BREWER_COLOUR_SCHEMES[self.brewer_colour_idx % len(self.BREWER_COLOUR_SCHEMES)] | |
156 self.brewer_colour_idx += 1 | |
157 return r, g, b | |
158 | |
159 def parse_menus(self, track): | |
160 trackConfig = {'menuTemplate': [{}, {}, {}]} | |
161 | |
162 if 'menu' in track['menus']: | |
163 menu_list = [track['menus']['menu']] | |
164 if isinstance(track['menus']['menu'], list): | |
165 menu_list = track['menus']['menu'] | |
166 | |
167 for m in menu_list: | |
168 tpl = { | |
169 'action': m['action'], | |
170 'label': m.get('label', '{name}'), | |
171 'iconClass': m.get('iconClass', 'dijitIconBookmark'), | |
172 } | |
173 if 'url' in m: | |
174 tpl['url'] = m['url'] | |
175 if 'content' in m: | |
176 tpl['content'] = m['content'] | |
177 if 'title' in m: | |
178 tpl['title'] = m['title'] | |
179 | |
180 trackConfig['menuTemplate'].append(tpl) | |
181 | |
182 return trackConfig | |
183 | |
184 def parse_colours(self, track, trackFormat, gff3=None): | |
185 # Wiggle tracks have a bicolor pallete | |
186 trackConfig = {'style': {}} | |
187 if trackFormat == 'wiggle': | |
188 | |
189 trackConfig['style']['pos_color'] = track['wiggle']['color_pos'] | |
190 trackConfig['style']['neg_color'] = track['wiggle']['color_neg'] | |
191 | |
192 if trackConfig['style']['pos_color'] == '__auto__': | |
193 trackConfig['style']['neg_color'] = self.hex_from_rgb(*self._get_colours()) | |
194 trackConfig['style']['pos_color'] = self.hex_from_rgb(*self._get_colours()) | |
195 | |
196 # Wiggle tracks can change colour at a specified place | |
197 bc_pivot = track['wiggle']['bicolor_pivot'] | |
198 if bc_pivot not in ('mean', 'zero'): | |
199 # The values are either one of those two strings | |
200 # or a number | |
201 bc_pivot = float(bc_pivot) | |
202 trackConfig['bicolor_pivot'] = bc_pivot | |
203 elif 'scaling' in track: | |
204 if track['scaling']['method'] == 'ignore': | |
205 if track['scaling']['scheme']['color'] != '__auto__': | |
206 trackConfig['style']['color'] = track['scaling']['scheme']['color'] | |
207 else: | |
208 trackConfig['style']['color'] = self.hex_from_rgb(*self._get_colours()) | |
209 else: | |
210 # Scored method | |
211 algo = track['scaling']['algo'] | |
212 # linear, logarithmic, blast | |
213 scales = track['scaling']['scales'] | |
214 # type __auto__, manual (min, max) | |
215 scheme = track['scaling']['scheme'] | |
216 # scheme -> (type (opacity), color) | |
217 # ================================== | |
218 # GENE CALLS OR BLAST | |
219 # ================================== | |
220 if trackFormat == 'blast': | |
221 red, green, blue = self._get_colours() | |
222 color_function = self.COLOR_FUNCTION_TEMPLATE.format(**{ | |
223 'score': "feature._parent.get('score')", | |
224 'opacity': self.OPACITY_MATH['blast'], | |
225 'red': red, | |
226 'green': green, | |
227 'blue': blue, | |
228 }) | |
229 trackConfig['style']['color'] = color_function.replace('\n', '') | |
230 elif trackFormat == 'gene_calls': | |
231 # Default values, based on GFF3 spec | |
232 min_val = 0 | |
233 max_val = 1000 | |
234 # Get min/max and build a scoring function since JBrowse doesn't | |
235 if scales['type'] == 'automatic' or scales['type'] == '__auto__': | |
236 min_val, max_val = self.min_max_gff(gff3) | |
237 else: | |
238 min_val = scales.get('min', 0) | |
239 max_val = scales.get('max', 1000) | |
240 | |
241 if scheme['color'] == '__auto__': | |
242 user_color = 'undefined' | |
243 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
244 elif scheme['color'].startswith('#'): | |
245 user_color = "'%s'" % self.hex_from_rgb(*self.rgb_from_hex(scheme['color'][1:])) | |
246 auto_color = 'undefined' | |
247 else: | |
248 user_color = 'undefined' | |
249 auto_color = "'%s'" % self.hex_from_rgb(*self._get_colours()) | |
250 | |
251 color_function = self.COLOR_FUNCTION_TEMPLATE_QUAL.format(**{ | |
252 'opacity': self.OPACITY_MATH[algo].format(**{'max': max_val, 'min': min_val}), | |
253 'user_spec_color': user_color, | |
254 'auto_gen_color': auto_color, | |
255 }) | |
256 | |
257 trackConfig['style']['color'] = color_function.replace('\n', '') | |
258 return trackConfig | |
259 | |
260 | |
261 def etree_to_dict(t): | |
262 d = {t.tag: {} if t.attrib else None} | |
263 children = list(t) | |
264 if children: | |
265 dd = defaultdict(list) | |
266 for dc in map(etree_to_dict, children): | |
267 for k, v in dc.iteritems(): | |
268 dd[k].append(v) | |
269 d = {t.tag: {k: v[0] if len(v) == 1 else v for k, v in dd.iteritems()}} | |
270 if t.attrib: | |
271 d[t.tag].update(('@' + k, v) for k, v in t.attrib.iteritems()) | |
272 if t.text: | |
273 text = t.text.strip() | |
274 if children or t.attrib: | |
275 if text: | |
276 d[t.tag]['#text'] = text | |
277 else: | |
278 d[t.tag] = text | |
279 return d | |
280 | |
281 | |
282 # score comes from feature._parent.get('score') or feature.get('score') | |
283 | |
284 INSTALLED_TO = os.path.dirname(os.path.realpath(__file__)) | |
285 | |
286 | |
287 class JbrowseConnector(object): | |
288 | |
289 def __init__(self, jbrowse, outdir, genomes, standalone=False, gencode=1): | |
290 self.TN_TABLE = { | |
291 'gff3': '--gff', | |
292 'gff': '--gff', | |
293 'bed': '--bed', | |
294 'genbank': '--gbk', | |
295 } | |
296 | |
297 self.cs = ColorScaling() | |
298 self.jbrowse = jbrowse | |
299 self.outdir = outdir | |
300 self.genome_paths = genomes | |
301 self.standalone = standalone | |
302 self.gencode = gencode | |
303 self.tracksToIndex = [] | |
304 | |
305 if standalone: | |
306 self.clone_jbrowse(self.jbrowse, self.outdir) | |
307 else: | |
308 try: | |
309 os.makedirs(self.outdir) | |
310 except OSError: | |
311 # Ignore if the folder exists | |
312 pass | |
313 | |
314 self.process_genomes() | |
315 self.update_gencode() | |
316 | |
317 def update_gencode(self): | |
318 table = CodonTable.unambiguous_dna_by_id[int(self.gencode)] | |
319 trackList = os.path.join(self.outdir, 'data', 'trackList.json') | |
320 with open(trackList, 'r') as handle: | |
321 trackListData = json.load(handle) | |
322 | |
323 trackListData['tracks'][0].update({ | |
324 'codonStarts': table.start_codons, | |
325 'codonStops': table.stop_codons, | |
326 'codonTable': table.forward_table, | |
327 }) | |
328 | |
329 with open(trackList, 'w') as handle: | |
330 json.dump(trackListData, handle, indent=2) | |
331 | |
332 def subprocess_check_call(self, command): | |
333 log.debug('cd %s && %s', self.outdir, ' '.join(command)) | |
334 subprocess.check_call(command, cwd=self.outdir) | |
335 | |
336 def _jbrowse_bin(self, command): | |
337 return os.path.realpath(os.path.join(self.jbrowse, 'bin', command)) | |
338 | |
339 def process_genomes(self): | |
340 for genome_path in self.genome_paths: | |
341 self.subprocess_check_call([ | |
342 'perl', self._jbrowse_bin('prepare-refseqs.pl'), | |
343 '--fasta', genome_path]) | |
344 | |
345 def generate_names(self): | |
346 # Generate names | |
347 | |
348 args = [ | |
349 'perl', self._jbrowse_bin('generate-names.pl'), | |
350 '--hashBits', '16' | |
351 ] | |
352 | |
353 tracks = ','.join(self.tracksToIndex) | |
354 | |
355 if tracks: | |
356 args += ['--tracks', tracks] | |
357 else: | |
358 # No tracks to index, index only the refseq | |
359 args += ['--tracks', 'DNA'] | |
360 | |
361 self.subprocess_check_call(args) | |
362 | |
363 def _add_json(self, json_data): | |
364 | |
365 cmd = [ | |
366 'perl', self._jbrowse_bin('add-json.pl'), | |
367 json.dumps(json_data), | |
368 os.path.join('data', 'trackList.json') | |
369 ] | |
370 self.subprocess_check_call(cmd) | |
371 | |
372 def _add_track_json(self, json_data): | |
373 if len(json_data.keys()) == 0: | |
374 return | |
375 | |
376 tmp = tempfile.NamedTemporaryFile(delete=False) | |
377 tmp.write(json.dumps(json_data)) | |
378 tmp.close() | |
379 cmd = ['perl', self._jbrowse_bin('add-track-json.pl'), tmp.name, | |
380 os.path.join('data', 'trackList.json')] | |
381 self.subprocess_check_call(cmd) | |
382 os.unlink(tmp.name) | |
383 | |
384 def _blastxml_to_gff3(self, xml, min_gap=10): | |
385 gff3_unrebased = tempfile.NamedTemporaryFile(delete=False) | |
386 cmd = ['python', os.path.join(INSTALLED_TO, 'blastxml_to_gapped_gff3.py'), | |
387 '--trim', '--trim_end', '--min_gap', str(min_gap), xml] | |
388 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_unrebased.name) | |
389 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_unrebased) | |
390 gff3_unrebased.close() | |
391 return gff3_unrebased.name | |
392 | |
393 def add_blastxml(self, data, trackData, blastOpts, **kwargs): | |
394 gff3 = self._blastxml_to_gff3(data, min_gap=blastOpts['min_gap']) | |
395 | |
396 if 'parent' in blastOpts and blastOpts['parent'] != 'None': | |
397 gff3_rebased = tempfile.NamedTemporaryFile(delete=False) | |
398 cmd = ['python', os.path.join(INSTALLED_TO, 'gff3_rebase.py')] | |
399 if blastOpts.get('protein', 'false') == 'true': | |
400 cmd.append('--protein2dna') | |
401 cmd.extend([os.path.realpath(blastOpts['parent']), gff3]) | |
402 log.debug('cd %s && %s > %s', self.outdir, ' '.join(cmd), gff3_rebased.name) | |
403 subprocess.check_call(cmd, cwd=self.outdir, stdout=gff3_rebased) | |
404 gff3_rebased.close() | |
405 | |
406 # Replace original gff3 file | |
407 shutil.copy(gff3_rebased.name, gff3) | |
408 os.unlink(gff3_rebased.name) | |
409 | |
410 config = { | |
411 'glyph': 'JBrowse/View/FeatureGlyph/Segments', | |
412 "category": trackData['category'], | |
413 } | |
414 | |
415 clientConfig = trackData['style'] | |
416 | |
417 cmd = ['perl', self._jbrowse_bin('flatfile-to-json.pl'), | |
418 '--gff', gff3, | |
419 '--trackLabel', trackData['label'], | |
420 '--key', trackData['key'], | |
421 '--clientConfig', json.dumps(clientConfig), | |
422 '--config', json.dumps(config), | |
423 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
424 ] | |
425 | |
426 self.subprocess_check_call(cmd) | |
427 os.unlink(gff3) | |
428 | |
429 if blastOpts.get('index', 'false') == 'true': | |
430 self.tracksToIndex.append("%s" % trackData['label']) | |
431 | |
432 def add_bigwig(self, data, trackData, wiggleOpts, **kwargs): | |
433 dest = os.path.join('data', 'raw', trackData['label'] + '.bw') | |
434 cmd = ['ln', data, dest] | |
435 self.subprocess_check_call(cmd) | |
436 | |
437 trackData.update({ | |
438 "urlTemplate": os.path.join('..', dest), | |
439 "storeClass": "JBrowse/Store/SeqFeature/BigWig", | |
440 "type": "JBrowse/View/Track/Wiggle/Density", | |
441 }) | |
442 | |
443 trackData['type'] = wiggleOpts['type'] | |
444 trackData['variance_band'] = True if wiggleOpts['variance_band'] == 'true' else False | |
445 | |
446 if 'min' in wiggleOpts and 'max' in wiggleOpts: | |
447 trackData['min_score'] = wiggleOpts['min'] | |
448 trackData['max_score'] = wiggleOpts['max'] | |
449 else: | |
450 trackData['autoscale'] = wiggleOpts.get('autoscale', 'local') | |
451 | |
452 self._add_track_json(trackData) | |
453 | |
454 def add_bam(self, data, trackData, bamOpts, bam_index=None, **kwargs): | |
455 dest = os.path.join('data', 'raw', trackData['label'] + '.bam') | |
456 cmd = ['ln', '-s', os.path.realpath(data), dest] | |
457 self.subprocess_check_call(cmd) | |
458 | |
459 cmd = ['ln', '-s', os.path.realpath(bam_index), dest + '.bai'] | |
460 self.subprocess_check_call(cmd) | |
461 | |
462 trackData.update({ | |
463 "urlTemplate": os.path.join('..', dest), | |
464 "type": "JBrowse/View/Track/Alignments2", | |
465 "storeClass": "JBrowse/Store/SeqFeature/BAM", | |
466 }) | |
467 | |
468 self._add_track_json(trackData) | |
469 | |
470 if bamOpts.get('auto_snp', 'false') == 'true': | |
471 trackData2 = copy.copy(trackData) | |
472 trackData2.update({ | |
473 "type": "JBrowse/View/Track/SNPCoverage", | |
474 "key": trackData['key'] + " - SNPs/Coverage", | |
475 "label": trackData['label'] + "_autosnp", | |
476 }) | |
477 self._add_track_json(trackData2) | |
478 | |
479 def add_vcf(self, data, trackData, vcfOpts={}, **kwargs): | |
480 dest = os.path.join('data', 'raw', trackData['label'] + '.vcf') | |
481 # ln? | |
482 cmd = ['ln', '-s', data, dest] | |
483 self.subprocess_check_call(cmd) | |
484 cmd = ['bgzip', dest] | |
485 self.subprocess_check_call(cmd) | |
486 cmd = ['tabix', '-p', 'vcf', dest + '.gz'] | |
487 self.subprocess_check_call(cmd) | |
488 | |
489 trackData.update({ | |
490 "urlTemplate": os.path.join('..', dest + '.gz'), | |
491 "type": "JBrowse/View/Track/HTMLVariants", | |
492 "storeClass": "JBrowse/Store/SeqFeature/VCFTabix", | |
493 }) | |
494 self._add_track_json(trackData) | |
495 | |
496 def add_features(self, data, format, trackData, gffOpts, **kwargs): | |
497 cmd = [ | |
498 'perl', self._jbrowse_bin('flatfile-to-json.pl'), | |
499 self.TN_TABLE.get(format, 'gff'), | |
500 data, | |
501 '--trackLabel', trackData['label'], | |
502 # '--trackType', 'JBrowse/View/Track/CanvasFeatures', | |
503 '--key', trackData['key'] | |
504 ] | |
505 | |
506 config = copy.copy(trackData) | |
507 clientConfig = trackData['style'] | |
508 del config['style'] | |
509 | |
510 if 'match' in gffOpts: | |
511 config['glyph'] = 'JBrowse/View/FeatureGlyph/Segments' | |
512 cmd += ['--type', gffOpts['match']] | |
513 | |
514 cmd += ['--clientConfig', json.dumps(clientConfig), | |
515 ] | |
516 | |
517 if 'trackType' in gffOpts: | |
518 cmd += [ | |
519 '--trackType', gffOpts['trackType'] | |
520 ] | |
521 else: | |
522 cmd += [ | |
523 '--trackType', 'JBrowse/View/Track/CanvasFeatures' | |
524 ] | |
525 | |
526 cmd.extend(['--config', json.dumps(config)]) | |
527 | |
528 self.subprocess_check_call(cmd) | |
529 | |
530 if gffOpts.get('index', 'false') == 'true': | |
531 self.tracksToIndex.append("%s" % trackData['label']) | |
532 | |
533 def process_annotations(self, track): | |
534 outputTrackConfig = { | |
535 'style': { | |
536 'label': track['style'].get('label', 'description'), | |
537 'className': track['style'].get('className', 'feature'), | |
538 'description': track['style'].get('description', ''), | |
539 }, | |
540 'category': track['category'], | |
541 } | |
542 | |
543 for i, (dataset_path, dataset_ext, track_human_label) in enumerate(track['trackfiles']): | |
544 log.info('Processing %s / %s', track['category'], track_human_label) | |
545 outputTrackConfig['key'] = track_human_label | |
546 hashData = [dataset_path, track_human_label, track['category']] | |
547 outputTrackConfig['label'] = hashlib.md5('|'.join(hashData)).hexdigest() + '_%s' % i | |
548 | |
549 # Colour parsing is complex due to different track types having | |
550 # different colour options. | |
551 colourOptions = self.cs.parse_colours(track['conf']['options'], track['format'], gff3=dataset_path) | |
552 # This used to be done with a dict.update() call, however that wiped out any previous style settings... | |
553 for key in colourOptions: | |
554 if key == 'style': | |
555 for subkey in colourOptions['style']: | |
556 outputTrackConfig['style'][subkey] = colourOptions['style'][subkey] | |
557 else: | |
558 outputTrackConfig[key] = colourOptions[key] | |
559 | |
560 menus = self.cs.parse_menus(track['conf']['options']) | |
561 outputTrackConfig.update(menus) | |
562 | |
563 # import pprint; pprint.pprint(track) | |
564 # import sys; sys.exit() | |
565 if dataset_ext in ('gff', 'gff3', 'bed'): | |
566 self.add_features(dataset_path, dataset_ext, outputTrackConfig, | |
567 track['conf']['options']['gff']) | |
568 elif dataset_ext == 'bigwig': | |
569 self.add_bigwig(dataset_path, outputTrackConfig, | |
570 track['conf']['options']['wiggle']) | |
571 elif dataset_ext == 'bam': | |
572 real_indexes = track['conf']['options']['pileup']['bam_indices']['bam_index'] | |
573 if not isinstance(real_indexes, list): | |
574 # <bam_indices> | |
575 # <bam_index>/path/to/a.bam.bai</bam_index> | |
576 # </bam_indices> | |
577 # | |
578 # The above will result in the 'bam_index' key containing a | |
579 # string. If there are two or more indices, the container | |
580 # becomes a list. Fun! | |
581 real_indexes = [real_indexes] | |
582 | |
583 self.add_bam(dataset_path, outputTrackConfig, | |
584 track['conf']['options']['pileup'], | |
585 bam_index=real_indexes[i]) | |
586 elif dataset_ext == 'blastxml': | |
587 self.add_blastxml(dataset_path, outputTrackConfig, track['conf']['options']['blast']) | |
588 elif dataset_ext == 'vcf': | |
589 self.add_vcf(dataset_path, outputTrackConfig) | |
590 | |
591 # Return non-human label for use in other fields | |
592 yield outputTrackConfig['label'] | |
593 | |
594 def add_final_data(self, data): | |
595 viz_data = {} | |
596 if len(data['visibility']['default_on']) > 0: | |
597 viz_data['defaultTracks'] = ','.join(data['visibility']['default_on']) | |
598 | |
599 if len(data['visibility']['always']) > 0: | |
600 viz_data['alwaysOnTracks'] = ','.join(data['visibility']['always']) | |
601 | |
602 if len(data['visibility']['force']) > 0: | |
603 viz_data['forceTracks'] = ','.join(data['visibility']['force']) | |
604 | |
605 generalData = {} | |
606 if data['general']['aboutDescription'] is not None: | |
607 generalData['aboutThisBrowser'] = {'description': data['general']['aboutDescription'].strip()} | |
608 | |
609 generalData['view'] = { | |
610 'trackPadding': data['general']['trackPadding'] | |
611 } | |
612 generalData['shareLink'] = (data['general']['shareLink'] == 'true') | |
613 generalData['show_tracklist'] = (data['general']['show_tracklist'] == 'true') | |
614 generalData['show_nav'] = (data['general']['show_nav'] == 'true') | |
615 generalData['show_overview'] = (data['general']['show_overview'] == 'true') | |
616 generalData['show_menu'] = (data['general']['show_menu'] == 'true') | |
617 generalData['hideGenomeOptions'] = (data['general']['hideGenomeOptions'] == 'true') | |
618 | |
619 viz_data.update(generalData) | |
620 self._add_json(viz_data) | |
621 | |
622 def clone_jbrowse(self, jbrowse_dir, destination): | |
623 """Clone a JBrowse directory into a destination directory. | |
624 """ | |
625 # JBrowse seems to have included some bad symlinks, cp ignores bad symlinks | |
626 # unlike copytree | |
627 cmd = ['cp', '-r', os.path.join(jbrowse_dir, '.'), destination] | |
628 log.debug(' '.join(cmd)) | |
629 subprocess.check_call(cmd) | |
630 cmd = ['mkdir', '-p', os.path.join(destination, 'data', 'raw')] | |
631 log.debug(' '.join(cmd)) | |
632 subprocess.check_call(cmd) | |
633 | |
634 # http://unix.stackexchange.com/a/38691/22785 | |
635 # JBrowse releases come with some broken symlinks | |
636 cmd = ['find', destination, '-type', 'l', '-xtype', 'l', '-exec', 'rm', "'{}'", '+'] | |
637 log.debug(' '.join(cmd)) | |
638 subprocess.check_call(cmd) | |
639 | |
640 | |
641 if __name__ == '__main__': | |
642 parser = argparse.ArgumentParser(description="", epilog="") | |
643 parser.add_argument('xml', type=file, help='Track Configuration') | |
644 | |
645 parser.add_argument('--jbrowse', help='Folder containing a jbrowse release') | |
646 parser.add_argument('--outdir', help='Output directory', default='out') | |
647 parser.add_argument('--standalone', help='Standalone mode includes a copy of JBrowse', action='store_true') | |
648 args = parser.parse_args() | |
649 | |
650 tree = ET.parse(args.xml.name) | |
651 root = tree.getroot() | |
652 | |
653 jc = JbrowseConnector( | |
654 jbrowse=args.jbrowse, | |
655 outdir=args.outdir, | |
656 genomes=[os.path.realpath(x.text) for x in root.findall('metadata/genomes/genome')], | |
657 standalone=args.standalone, | |
658 gencode=root.find('metadata/gencode').text | |
659 ) | |
660 | |
661 extra_data = { | |
662 'visibility': { | |
663 'default_on': [], | |
664 'default_off': [], | |
665 'force': [], | |
666 'always': [], | |
667 }, | |
668 'general': { | |
669 'defaultLocation': root.find('metadata/general/defaultLocation').text, | |
670 'trackPadding': int(root.find('metadata/general/trackPadding').text), | |
671 'shareLink': root.find('metadata/general/shareLink').text, | |
672 'aboutDescription': root.find('metadata/general/aboutDescription').text, | |
673 'show_tracklist': root.find('metadata/general/show_tracklist').text, | |
674 'show_nav': root.find('metadata/general/show_nav').text, | |
675 'show_overview': root.find('metadata/general/show_overview').text, | |
676 'show_menu': root.find('metadata/general/show_menu').text, | |
677 'hideGenomeOptions': root.find('metadata/general/hideGenomeOptions').text, | |
678 } | |
679 } | |
680 for track in root.findall('tracks/track'): | |
681 track_conf = {} | |
682 track_conf['trackfiles'] = [ | |
683 (os.path.realpath(x.attrib['path']), x.attrib['ext'], x.attrib['label']) | |
684 for x in track.findall('files/trackFile') | |
685 ] | |
686 | |
687 track_conf['category'] = track.attrib['cat'] | |
688 track_conf['format'] = track.attrib['format'] | |
689 try: | |
690 # Only pertains to gff3 + blastxml. TODO? | |
691 track_conf['style'] = {t.tag: t.text for t in track.find('options/style')} | |
692 except TypeError: | |
693 track_conf['style'] = {} | |
694 pass | |
695 track_conf['conf'] = etree_to_dict(track.find('options')) | |
696 keys = jc.process_annotations(track_conf) | |
697 | |
698 for key in keys: | |
699 extra_data['visibility'][track.attrib.get('visibility', 'default_off')].append(key) | |
700 | |
701 jc.add_final_data(extra_data) | |
702 jc.generate_names() |