Mercurial > repos > george-weingart > graphlan_import
comparison export2graphlan.py @ 0:cac6247cb1d3 draft
graphlan_import
author | george-weingart |
---|---|
date | Tue, 26 Aug 2014 14:51:29 -0400 |
parents | |
children | 82fb838d02dc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cac6247cb1d3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 from argparse import ArgumentParser | |
4 from colorsys import hsv_to_rgb | |
5 from math import log | |
6 from StringIO import StringIO | |
7 from re import compile | |
8 from hclust2.hclust2 import DataMatrix | |
9 | |
10 | |
11 __author__ = 'Francesco Asnicar' | |
12 __email__ = "francesco.asnicar@gmail.com" | |
13 __version__ = '0.13' | |
14 __date__ = '29th July 2014' | |
15 | |
16 | |
17 def scale_color((h, s, v), factor=1.): | |
18 """ | |
19 Takes as input a tuple that represents a color in HSV format, and optionally a scale factor. | |
20 Return an RGB string that is the converted HSV color, scaled by the given factor. | |
21 """ | |
22 if (h < 0.) or (h > 360.): | |
23 raise Exception('[scale_color()] Hue value out of range (0, 360): ' + str(h)) | |
24 | |
25 if (s < 0.) or (s > 100.): | |
26 raise Exception('[scale_color()] Saturation value out of range (0, 100): ' + str(s)) | |
27 | |
28 if (v < 0.) or (v > 100.): | |
29 raise Exception('[scale_color()] Value value out of range (0, 100): ' + str(v)) | |
30 | |
31 if (factor < 0.) or (factor > 1.): | |
32 raise Exception('[scale_color()] Factor value out of range (0.0, 1.0): ' + str(factor)) | |
33 | |
34 v *= factor | |
35 r, g, b = hsv_to_rgb(h/360., s/100., v/100.) | |
36 | |
37 return '#{0:02x}{1:02x}{2:02x}'.format(int(round(r*255.)), int(round(g*255.)), int(round(b*255.))) | |
38 | |
39 | |
40 def read_params(): | |
41 """ | |
42 Parse the input parameters, performing some validity check. | |
43 Return the parsed arguments. | |
44 """ | |
45 parser = ArgumentParser(description="export2graphlan.py (ver. " + __version__ + | |
46 " of " + __date__ + "). Convert MetaPhlAn, LEfSe, and/or HUMAnN output to GraPhlAn input format. Authors: " | |
47 + __author__ + " (" + __email__ + ")") | |
48 | |
49 # input parameters group | |
50 group = parser.add_argument_group(title='input parameters', | |
51 description="You need to provide at least one of the two arguments") | |
52 group.add_argument('-i', '--lefse_input', | |
53 type=str, | |
54 required=False, | |
55 help="LEfSe input data") | |
56 group.add_argument('-o', '--lefse_output', | |
57 type=str, | |
58 required=False, | |
59 help="LEfSe output result data") | |
60 | |
61 # output parameters group | |
62 group = parser.add_argument_group(title='output parameters') | |
63 group.add_argument('-t', '--tree', | |
64 type=str, | |
65 required=True, | |
66 help="Output filename where save the input tree for GraPhlAn") | |
67 group.add_argument('-a', '--annotation', | |
68 type=str, | |
69 required=True, | |
70 help="Output filename where save GraPhlAn annotation") | |
71 | |
72 # annotations | |
73 parser.add_argument('--annotations', | |
74 default=None, | |
75 type=str, | |
76 required=False, | |
77 help="List which levels should be annotated in the tree. Use a comma separate values form, e.g., --annotation_levels 1,2,3. Default is None") | |
78 parser.add_argument('--external_annotations', | |
79 default=None, | |
80 type=str, | |
81 required=False, | |
82 help="List which levels should use the external legend for the annotation. Use a comma separate values form, e.g., --annotation_levels 1,2,3. Default is None") | |
83 # shaded background | |
84 parser.add_argument('--background_levels', | |
85 default=None, | |
86 type=str, | |
87 required=False, | |
88 help="List which levels should be highlight with a shaded background. Use a comma separate values form, e.g., --background_levels 1,2,3") | |
89 parser.add_argument('--background_clades', | |
90 default=None, | |
91 type=str, | |
92 required=False, | |
93 help="Specify the clades that should be highlight with a shaded background. Use a comma separate values form and surround the string with \" if it contains spaces. Example: --background_clades \"Bacteria.Actinobacteria, Bacteria.Bacteroidetes.Bacteroidia, Bacteria.Firmicutes.Clostridia.Clostridiales\"") | |
94 parser.add_argument('--background_colors', | |
95 default=None, | |
96 type=str, | |
97 required=False, | |
98 help="Set the color to use for the shaded background. Colors can be either in RGB or HSV (using a semi-colon to separate values, surrounded with ()) format. Use a comma separate values form and surround the string with \" if it contains spaces. Example: --background_colors \"#29cc36, (150; 100; 100), (280; 80; 88)\"") | |
99 # title | |
100 parser.add_argument('--title', | |
101 type=str, | |
102 required=False, | |
103 help="If specified set the title of the GraPhlAn plot. Surround the string with \" if it contains spaces, e.g., --title \"Title example\"") | |
104 # title font size | |
105 parser.add_argument('--title_font_size', | |
106 default=15, | |
107 type=int, | |
108 required=False, | |
109 help="Set the title font size. Default is 15") | |
110 # clade size | |
111 parser.add_argument('--def_clade_size', | |
112 default=10., | |
113 type=float, | |
114 required=False, | |
115 help="Set a default size for clades that are not found as biomarkers by LEfSe. Default is 10") | |
116 parser.add_argument('--min_clade_size', | |
117 default=20., | |
118 type=float, | |
119 required=False, | |
120 help="Set the minimum value of clades that are biomarkers. Default is 20") | |
121 parser.add_argument('--max_clade_size', | |
122 default=200., | |
123 type=float, | |
124 required=False, | |
125 help="Set the maximum value of clades that are biomarkers. Default is 200") | |
126 # font size | |
127 parser.add_argument('--def_font_size', | |
128 default=10, | |
129 type=int, | |
130 required=False, | |
131 help="Set a default font size. Default is 10") | |
132 parser.add_argument('--min_font_size', | |
133 default=8, | |
134 type=int, | |
135 required=False, | |
136 help="Set the minimum font size to use. Default is 8") | |
137 parser.add_argument('--max_font_size', | |
138 default=12, | |
139 type=int, | |
140 required=False, | |
141 help="Set the maximum font size. Default is 12") | |
142 # legend font size | |
143 parser.add_argument('--annotation_legend_font_size', | |
144 default=10, | |
145 type=int, | |
146 required=False, | |
147 help="Set the font size for the annotation legend. Default is 10") | |
148 # abundance threshold | |
149 parser.add_argument('--abundance_threshold', | |
150 default=20., | |
151 type=float, | |
152 required=False, | |
153 help="Set the minimun abundace value for a clade to be annotated. Default is 20.0") | |
154 # ONLY lefse_input provided | |
155 parser.add_argument('--most_abundant', | |
156 default=10, | |
157 type=int, | |
158 required=False, | |
159 help="When only lefse_input is provided, you can specify how many clades highlight. Since the biomarkers are missing, they will be chosen from the most abundant") | |
160 parser.add_argument('--least_biomarkers', | |
161 default=3, | |
162 type=int, | |
163 required=False, | |
164 help="When only lefse_input is provided, you can specify the minimum number of biomarkers to extract. The taxonomy is parsed, and the level is choosen in order to have at least the specified number of biomarkers") | |
165 # decide to keep the OTU id or to merger at the above taxonomic level | |
166 parser.add_argument('--discard_otus', | |
167 default=True, | |
168 action='store_false', | |
169 help="If specified the OTU ids will be discarde from the taxonmy. Default behavior keep OTU ids in taxonomy") | |
170 | |
171 DataMatrix.input_parameters(parser) | |
172 args = parser.parse_args() | |
173 | |
174 # check if at least one of the input params is given | |
175 if (not args.lefse_input) and (not args.lefse_output) : | |
176 raise Exception("[read_params()] You must provide at least one of the two input parameters: ") | |
177 | |
178 # check that min_clade_size is less than max_clade_size | |
179 if args.min_clade_size > args.max_clade_size : | |
180 print "[W] min_clade_size cannot be greater than max_clade_size, assigning their default values" | |
181 args.min_clade_size = 20. | |
182 args.max_clade_size = 200. | |
183 | |
184 # check that min_font_size is less than max_font_size | |
185 if args.min_font_size > args.max_font_size : | |
186 print "[W] min_font_size cannot be greater than max_font_size, assigning their default values" | |
187 args.min_font_size = 8 | |
188 args.max_font_size = 12 | |
189 | |
190 return args | |
191 | |
192 | |
193 def get_file_type(filename): | |
194 """ | |
195 Return the extension (if any) of the ``filename`` in lower case. | |
196 """ | |
197 return filename[filename.rfind('.')+1:].lower() | |
198 | |
199 | |
200 def parse_biom(filename, keep_otus=True): | |
201 """ | |
202 Load a biom table and extract the taxonomy (from metadata), removing the unuseful header. | |
203 Return the input biom in tab-separated format. | |
204 """ | |
205 from biom import load_table # avoid to ask for the BIOM library if there is no biom file | |
206 | |
207 biom_table = load_table(filename) | |
208 strs = biom_table.delimited_self(header_value='TAXA', header_key='taxonomy') | |
209 lst1 = [str(s) for s in strs.split('\n')[1:]] # skip the "# Constructed from biom file" entry | |
210 biom_file = [] | |
211 out = [lst1[0]] # save the header | |
212 pre_taxa = compile(".__") | |
213 classs = compile("\(class\)") | |
214 | |
215 # consistency check | |
216 i = 0 | |
217 while i < (len(lst1)-1): | |
218 if len([s for s in lst1[i].split('\t')]) != len([s for s in lst1[i+1].split('\t')]) : | |
219 raise Exception('[parse_biom()] It seems that taxonomic metadata are missing, maybe is the wrong biom file?') | |
220 | |
221 i += 1 | |
222 | |
223 for l in lst1[1:]: | |
224 otu = None | |
225 lst = [str(s).strip() for s in l.split('\t')[1:-1]] | |
226 | |
227 if keep_otus: | |
228 otu = l.split('\t')[0] | |
229 | |
230 # Clean an move taxa in first place | |
231 taxa = '.'.join([s.strip().replace('[', '').replace('u\'', '').replace(']', '').replace(' ', '').replace('\'', '') for s in l.split('\t')[-1].split(',')]) | |
232 taxa = pre_taxa.sub('', taxa) # remove '{k|p|o|g|s}__' | |
233 taxa = classs.sub('', taxa) # remove '(class)' | |
234 taxa = taxa.rstrip('.') # remove trailing dots | |
235 | |
236 if otu: | |
237 taxa = taxa + '.' + otu | |
238 | |
239 biom_file.append([taxa] + lst) | |
240 | |
241 # merge such rows that have the same taxa | |
242 i = 1 | |
243 dic = {} | |
244 | |
245 for l in biom_file[i:]: | |
246 for k in biom_file[i+1:]: | |
247 if l[0] == k[0]: | |
248 lst = [] | |
249 j = 1 | |
250 | |
251 while j < len(l): | |
252 lst.append(float(l[j]) + float(k[j])) | |
253 j += 1 | |
254 | |
255 if l[0] in dic: | |
256 lst1 = dic[l[0]] | |
257 j = 0 | |
258 lst2 = [] | |
259 | |
260 while j < len(lst1): | |
261 lst2.append(float(lst1[j]) + float(lst[j])) | |
262 j += 1 | |
263 | |
264 lst = lst2 | |
265 | |
266 dic[l[0]] = lst | |
267 # if not in dic, add it! | |
268 if l[0] not in dic: | |
269 dic[l[0]] = l[1:] | |
270 | |
271 i += 1 | |
272 | |
273 for k in dic: | |
274 out.append('\t'.join([str(s) for s in [k] + dic[k]])) | |
275 | |
276 return '\n'.join(out) | |
277 | |
278 | |
279 def get_most_abundant(abundances, xxx): | |
280 """ | |
281 Sort by the abundance level all the taxonomy that represent at least two levels. | |
282 Return the first ``xxx`` most abundant. | |
283 """ | |
284 abundant = [] | |
285 | |
286 for a in abundances : | |
287 if a.count('|') > 0: | |
288 abundant.append((float(abundances[a]), a.replace('|', '.'))) | |
289 elif a.count('.') > 0: | |
290 abundant.append((float(abundances[a]), a)) | |
291 | |
292 abundant.sort(reverse=True) | |
293 return abundant[:xxx] | |
294 | |
295 | |
296 def get_biomarkes(abundant, xxx): | |
297 """ | |
298 Split the taxonomy and then look, level by level, when there are at least ``xxx`` distinct branches. | |
299 Return the set of branches as biomarkers to highlight. | |
300 """ | |
301 cc = [] | |
302 bk = set() | |
303 lvl = 0 | |
304 | |
305 for _, t in abundant: | |
306 cc.append(t.split('.')) | |
307 | |
308 while lvl < len(max(cc)): | |
309 bk = set() | |
310 | |
311 for c in cc: | |
312 if lvl < len(c): | |
313 bk |= set([c[lvl]]) | |
314 | |
315 if len(bk) >= xxx: | |
316 break | |
317 | |
318 lvl += 1 | |
319 | |
320 return bk | |
321 | |
322 def scale_clade_size(minn, maxx, abu, max_abu): | |
323 """ | |
324 Return the value of ``abu`` scaled to ``max_abu`` logarithmically, and then map from ``minn`` to ``maxx``. | |
325 """ | |
326 return minn + maxx*log(1. + (abu/max_abu)) | |
327 | |
328 | |
329 def main(): | |
330 """ | |
331 """ | |
332 colors = [(245., 90., 100.), (125., 80., 80.), (0., 80., 100.), (195., 100., 100.), (150., 100., 100.), (55., 100., 100.), (280., 80., 88.)] # HSV format | |
333 args = read_params() | |
334 lefse_input = None | |
335 lefse_output = {} | |
336 color = {} | |
337 biomarkers = set() | |
338 taxa = [] | |
339 abundances = {} | |
340 max_abundances = None | |
341 max_effect_size = None | |
342 max_log_effect_size = None | |
343 background_list = [] | |
344 background_clades = [] | |
345 background_colors = {} | |
346 annotations_list = [] | |
347 external_annotations_list = [] | |
348 lin = False | |
349 lout = False | |
350 | |
351 # get the levels that should be shaded | |
352 if args.background_levels : | |
353 background_list = [int(i.strip()) for i in args.background_levels.strip().split(',')] | |
354 | |
355 # get the background_clades | |
356 if args.background_clades : | |
357 if get_file_type(args.background_colors) in ['txt'] : | |
358 with open(args.background_clades, 'r') as f: | |
359 background_clades = [str(s.strip()) for s in f] | |
360 else : # it's a string in csv format | |
361 background_clades = [str(s.strip()) for s in args.background_clades.split(',')] | |
362 | |
363 # read the set of colors to use for the background_clades | |
364 if args.background_colors : | |
365 col = [] | |
366 | |
367 if get_file_type(args.background_colors) in ['txt'] : | |
368 with open(args.background_colors, 'r') as f : | |
369 col = [str(s.strip()) for s in f] | |
370 else : # it's a string in csv format | |
371 col = [c.strip() for c in args.background_colors.split(',')] | |
372 | |
373 lst = {} | |
374 i = 0 | |
375 | |
376 for c in background_clades : | |
377 cc = c[:c.find('.')] | |
378 | |
379 if cc not in lst : | |
380 background_colors[c] = col[i % len(col)] | |
381 lst[cc] = col[i % len(col)] | |
382 i += 1 | |
383 else : | |
384 background_colors[c] = lst[cc] | |
385 | |
386 # get the levels that will use the internal annotation | |
387 if args.annotations : | |
388 annotations_list = [int(i.strip()) for i in args.annotations.strip().split(',')] | |
389 | |
390 # get the levels that will use the external legend annotation | |
391 if args.external_annotations : | |
392 external_annotations_list = [int(i.strip()) for i in args.external_annotations.strip().split(',')] | |
393 | |
394 if args.lefse_input : | |
395 # if the lefse_input is in biom format, convert it | |
396 if get_file_type(args.lefse_input) in 'biom' : | |
397 try : | |
398 biom = parse_biom(args.lefse_input, args.discard_otus) | |
399 lefse_input = DataMatrix(StringIO(biom), args) | |
400 except Exception as e : | |
401 lin = True | |
402 print e | |
403 else : | |
404 lefse_input = DataMatrix(args.lefse_input, args) | |
405 | |
406 if not lin : | |
407 taxa = [t.replace('|', '.') for t in lefse_input.get_fnames()] # build taxonomy list | |
408 abundances = dict(lefse_input.get_averages()) | |
409 max_abundances = max([abundances[x] for x in abundances]) | |
410 else : # no lefse_input provided | |
411 lin = True | |
412 | |
413 if args.lefse_output : | |
414 # if the lefse_output is in biom format... I don't think it's possible! | |
415 if get_file_type(args.lefse_output) in 'biom' : | |
416 lout = True | |
417 print "Seriously?? LEfSe output file is not expected to be in biom format!" | |
418 else : | |
419 lst = [] | |
420 | |
421 with open(args.lefse_output, 'r') as out_file : | |
422 for line in out_file : | |
423 t, m, bk, es, pv = line.strip().split('\t') | |
424 lefse_output[t] = (es, bk, m, pv) | |
425 | |
426 # get distinct biomarkers | |
427 if bk : | |
428 biomarkers |= set([bk]) | |
429 | |
430 # get all effect size | |
431 if es : | |
432 lst.append(float(es)) | |
433 | |
434 max_effect_size = max(lst) | |
435 | |
436 # no lefse_input file provided! | |
437 if (not taxa) and (not abundances) : # build taxonomy list and abundaces map | |
438 for t in lefse_output : | |
439 _, _, m, _ = lefse_output[t] | |
440 abundances[t.replace('.', '|')] = float(m) | |
441 | |
442 max_abundances = max([abundances[x] for x in abundances]) | |
443 | |
444 for t in lefse_output : | |
445 scaled = scale_clade_size(args.min_clade_size, args.max_clade_size, abundances[t.replace('.', '|')], max_abundances) | |
446 | |
447 if scaled >= args.abundance_threshold : | |
448 taxa.append(t) | |
449 elif not lin : # no lefse_output provided and lefse_input correctly red | |
450 lout = True | |
451 | |
452 # find the xxx most abundant | |
453 abundant = get_most_abundant(abundances, args.most_abundant) | |
454 | |
455 # find the taxonomy level with at least yyy distinct childs from the xxx most abundant | |
456 biomarkers = get_biomarkes(abundant, args.least_biomarkers) | |
457 | |
458 # compose lefse_output variable | |
459 for _, t in abundant : | |
460 b = '' | |
461 | |
462 for bk in biomarkers : | |
463 if bk in t : | |
464 b = bk | |
465 | |
466 lefse_output[t] = (2., b, '', '') | |
467 | |
468 max_effect_size = 1. # It's not gonna working | |
469 | |
470 | |
471 # no lefse_output and no lefse_input provided | |
472 if lin and lout : | |
473 print "You must provide at least one input file!" | |
474 exit(1) | |
475 | |
476 # write the tree | |
477 with open(args.tree, 'w') as tree_file : | |
478 for taxonomy in taxa : | |
479 tree_file.write(''.join([taxonomy, '\n'])) | |
480 | |
481 # for each biomarker assign it to a different color | |
482 i = 0 | |
483 | |
484 for bk in biomarkers : | |
485 color[bk] = i % len(colors) | |
486 i += 1 | |
487 | |
488 # find max log abs value of effect size | |
489 if lefse_output : | |
490 lst = [] | |
491 | |
492 for t in lefse_output : | |
493 es, _, _, _ = lefse_output[t] | |
494 | |
495 if es : | |
496 lst.append(abs(log(float(es) / max_effect_size))) | |
497 | |
498 max_log_effect_size = max(lst) | |
499 | |
500 # write the annotation | |
501 try : | |
502 with open(args.annotation, 'w') as annot_file : | |
503 # set the title | |
504 if args.title : | |
505 annot_file.write(''.join(['\t'.join(['title', args.title]), '\n'])) | |
506 annot_file.write(''.join(['\t'.join(['title_font_size', str(args.title_font_size)]), '\n'])) | |
507 | |
508 # write some basic customizations | |
509 annot_file.write(''.join(['\t'.join(['clade_separation', '0.5']), '\n'])) | |
510 annot_file.write(''.join(['\t'.join(['branch_bracket_depth', '0.8']), '\n'])) | |
511 annot_file.write(''.join(['\t'.join(['branch_bracket_width', '0.2']), '\n'])) | |
512 annot_file.write(''.join(['\t'.join(['annotation_legend_font_size', str(args.annotation_legend_font_size)]), '\n'])) | |
513 annot_file.write(''.join(['\t'.join(['class_legend_font_size', '10']), '\n'])) | |
514 annot_file.write(''.join(['\t'.join(['class_legend_marker_size', '1.5']), '\n'])) | |
515 | |
516 # write the biomarkers' legend | |
517 for bk in biomarkers : | |
518 biom = bk.replace('_', ' ').upper() | |
519 rgb = scale_color(colors[color[bk]]) | |
520 annot_file.write(''.join(['\t'.join([biom, 'annotation', biom]), '\n'])) | |
521 annot_file.write(''.join(['\t'.join([biom, 'clade_marker_color', rgb]), '\n'])) | |
522 annot_file.write(''.join(['\t'.join([biom, 'clade_marker_size', '40']), '\n'])) | |
523 | |
524 done_clades = [] | |
525 | |
526 # write the annotation for the tree | |
527 for taxonomy in taxa : | |
528 level = taxonomy.count('.') + 1 # which level is this taxonomy? | |
529 clean_taxonomy = taxonomy[taxonomy.rfind('.') + 1:] # retrieve the last level in taxonomy | |
530 scaled = args.def_clade_size | |
531 | |
532 # scaled the size of the clade by the average abundance | |
533 if taxonomy.replace('.', '|') in abundances : | |
534 scaled = scale_clade_size(args.min_clade_size, args.max_clade_size, abundances[taxonomy.replace('.', '|')], max_abundances) | |
535 | |
536 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'clade_marker_size', str(scaled)]), '\n'])) | |
537 | |
538 # put a bakcground annotation to the levels specified by the user | |
539 shaded_background = [] | |
540 | |
541 for l in background_list : | |
542 if level >= l : | |
543 lst = [s.strip() for s in taxonomy.strip().split('.')] | |
544 t = '.'.join(lst[:l]) | |
545 | |
546 if t not in shaded_background : | |
547 shaded_background.append(t) | |
548 | |
549 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / l) | |
550 | |
551 annot_file.write(''.join(['\t'.join([t, 'annotation_background_color', args.background_color]), '\n'])) | |
552 annot_file.write(''.join(['\t'.join([t, 'annotation', '*']), '\n'])) | |
553 annot_file.write(''.join(['\t'.join([t, 'annotation_font_size', str(font_size)]), '\n'])) | |
554 | |
555 # put a bakcground annotation to the clades specified by the user | |
556 for c in background_colors : | |
557 bg_color = background_colors[c] | |
558 | |
559 if not bg_color.startswith('#') : | |
560 bg_color = bg_color.replace('(', '').replace(')', '') | |
561 h, s, v = bg_color.split(';') | |
562 bg_color = scale_color((float(h.strip()) , float(s.strip()), float(v.strip()))) | |
563 | |
564 # check if the taxonomy has more than one level | |
565 lvls = [str(cc.strip()) for cc in c.split('.')] | |
566 | |
567 for l in lvls : | |
568 if (l in taxonomy) and (l not in done_clades) : | |
569 lvl = taxonomy[:taxonomy.index(l)].count('.') + 1 | |
570 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / lvl) | |
571 | |
572 | |
573 annot_file.write(''.join(['\t'.join([l, 'annotation_background_color', bg_color]), '\n'])) | |
574 annot_file.write(''.join(['\t'.join([l, 'annotation', '*']), '\n'])) | |
575 annot_file.write(''.join(['\t'.join([l, 'annotation_font_size', str(font_size)]), '\n'])) | |
576 | |
577 done_clades.append(l) | |
578 | |
579 if lefse_output : | |
580 if taxonomy in lefse_output : | |
581 es, bk, _, _ = lefse_output[taxonomy] | |
582 | |
583 # if it is a biomarker then color and label it! | |
584 if bk : | |
585 fac = abs(log(float(es) / max_effect_size)) / max_log_effect_size | |
586 | |
587 try : | |
588 rgbs = scale_color(colors[color[bk]], fac) | |
589 except Exception as e : | |
590 print e | |
591 print ' '.join(["[W] Assign to", taxonomy, "the default color:", colors[color[bk]]]) | |
592 rgbs = colors[color[bk]] | |
593 | |
594 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'clade_marker_color', rgbs]), '\n'])) | |
595 | |
596 # write the annotation only if the abundance is above a given threshold and it is either internal or external annotation lists | |
597 if (scaled >= args.abundance_threshold) and ((level in annotations_list) or (level in external_annotations_list)) : | |
598 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / level) | |
599 annotation = '*' if level in annotations_list else '*:*' | |
600 | |
601 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'annotation_background_color', rgbs]), '\n'])) | |
602 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'annotation', annotation]), '\n'])) | |
603 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'annotation_font_size', str(font_size)]), '\n'])) | |
604 except Exception as e : | |
605 print e | |
606 | |
607 | |
608 if __name__ == '__main__' : | |
609 main() |