Mercurial > repos > george-weingart > graphlan_import
comparison export2graphlan.py @ 28:82fb838d02dc draft
Uploaded updated version of the programs
author | george-weingart |
---|---|
date | Fri, 05 Sep 2014 14:00:09 -0400 |
parents | cac6247cb1d3 |
children |
comparison
equal
deleted
inserted
replaced
27:252c242518f2 | 28:82fb838d02dc |
---|---|
8 from hclust2.hclust2 import DataMatrix | 8 from hclust2.hclust2 import DataMatrix |
9 | 9 |
10 | 10 |
11 __author__ = 'Francesco Asnicar' | 11 __author__ = 'Francesco Asnicar' |
12 __email__ = "francesco.asnicar@gmail.com" | 12 __email__ = "francesco.asnicar@gmail.com" |
13 __version__ = '0.13' | 13 __version__ = '0.17' |
14 __date__ = '29th July 2014' | 14 __date__ = '21th August 2014' |
15 | 15 |
16 | 16 |
17 def scale_color((h, s, v), factor=1.): | 17 def scale_color((h, s, v), factor=1.): |
18 """ | 18 """ |
19 Takes as input a tuple that represents a color in HSV format, and optionally a scale factor. | 19 Takes as input a tuple that represents a color in HSV format, and optionally a scale factor. |
20 Return an RGB string that is the converted HSV color, scaled by the given factor. | 20 Return an RGB string that is the converted HSV color, scaled by the given factor. |
21 """ | 21 """ |
22 if (h < 0.) or (h > 360.): | 22 if (h < 0.) or (h > 360.): |
23 raise Exception('[scale_color()] Hue value out of range (0, 360): ' + str(h)) | 23 raise Exception('[scale_color()] Hue value out of range (0, 360): ' + str(h)) |
24 | 24 |
25 if (s < 0.) or (s > 100.): | 25 if (s < 0.) or (s > 100.): |
26 raise Exception('[scale_color()] Saturation value out of range (0, 100): ' + str(s)) | 26 raise Exception('[scale_color()] Saturation value out of range (0, 100): ' + str(s)) |
27 | 27 |
28 if (v < 0.) or (v > 100.): | 28 if (v < 0.) or (v > 100.): |
29 raise Exception('[scale_color()] Value value out of range (0, 100): ' + str(v)) | 29 raise Exception('[scale_color()] Value value out of range (0, 100): ' + str(v)) |
30 | 30 |
31 if (factor < 0.) or (factor > 1.): | 31 if (factor < 0.) or (factor > 1.): |
32 raise Exception('[scale_color()] Factor value out of range (0.0, 1.0): ' + str(factor)) | 32 raise Exception('[scale_color()] Factor value out of range (0.0, 1.0): ' + str(factor)) |
33 | 33 |
34 v *= factor | 34 v *= factor |
35 r, g, b = hsv_to_rgb(h/360., s/100., v/100.) | 35 r, g, b = hsv_to_rgb(h/360., s/100., v/100.) |
36 | 36 |
37 return '#{0:02x}{1:02x}{2:02x}'.format(int(round(r*255.)), int(round(g*255.)), int(round(b*255.))) | 37 return '#{0:02x}{1:02x}{2:02x}'.format(int(round(r*255.)), int(round(g*255.)), int(round(b*255.))) |
38 | 38 |
39 | 39 |
40 def read_params(): | 40 def read_params(): |
41 """ | 41 """ |
42 Parse the input parameters, performing some validity check. | 42 Parse the input parameters, performing some validity check. |
43 Return the parsed arguments. | 43 Return the parsed arguments. |
44 """ | 44 """ |
45 parser = ArgumentParser(description="export2graphlan.py (ver. " + __version__ + | 45 parser = ArgumentParser(description="export2graphlan.py (ver. " + __version__ + |
46 " of " + __date__ + "). Convert MetaPhlAn, LEfSe, and/or HUMAnN output to GraPhlAn input format. Authors: " | 46 " of " + __date__ + "). Convert MetaPhlAn, LEfSe, and/or HUMAnN output to GraPhlAn input format. Authors: " |
47 + __author__ + " (" + __email__ + ")") | 47 + __author__ + " (" + __email__ + ")") |
48 | 48 |
49 # input parameters group | 49 # input parameters group |
50 group = parser.add_argument_group(title='input parameters', | 50 group = parser.add_argument_group(title='input parameters', |
51 description="You need to provide at least one of the two arguments") | 51 description="You need to provide at least one of the two arguments") |
52 group.add_argument('-i', '--lefse_input', | 52 group.add_argument('-i', '--lefse_input', |
53 type=str, | 53 type=str, |
54 required=False, | 54 required=False, |
55 help="LEfSe input data") | 55 help="LEfSe input data. A file that can be given to LEfSe for biomarkers analysis. It can be the result of a " |
56 group.add_argument('-o', '--lefse_output', | 56 "MetaPhlAn or HUMAnN analysis") |
57 type=str, | 57 group.add_argument('-o', '--lefse_output', |
58 required=False, | 58 type=str, |
59 help="LEfSe output result data") | 59 required=False, |
60 | 60 help="LEfSe output result data. The result of LEfSe analysis performed on the lefse_input file") |
61 # output parameters group | 61 |
62 group = parser.add_argument_group(title='output parameters') | 62 # output parameters group |
63 group.add_argument('-t', '--tree', | 63 group = parser.add_argument_group(title='output parameters') |
64 type=str, | 64 group.add_argument('-t', '--tree', |
65 required=True, | 65 type=str, |
66 help="Output filename where save the input tree for GraPhlAn") | 66 required=True, |
67 group.add_argument('-a', '--annotation', | 67 help="Output filename where save the input tree for GraPhlAn") |
68 type=str, | 68 group.add_argument('-a', '--annotation', |
69 required=True, | 69 type=str, |
70 help="Output filename where save GraPhlAn annotation") | 70 required=True, |
71 | 71 help="Output filename where save GraPhlAn annotation") |
72 # annotations | 72 |
73 parser.add_argument('--annotations', | 73 # annotations |
74 default=None, | 74 parser.add_argument('--annotations', |
75 type=str, | 75 default=None, |
76 required=False, | 76 type=str, |
77 help="List which levels should be annotated in the tree. Use a comma separate values form, e.g., --annotation_levels 1,2,3. Default is None") | 77 required=False, |
78 parser.add_argument('--external_annotations', | 78 help="List which levels should be annotated in the tree. Use a comma separate values form, e.g., " |
79 default=None, | 79 "--annotation_levels 1,2,3. Default is None") |
80 type=str, | 80 parser.add_argument('--external_annotations', |
81 required=False, | 81 default=None, |
82 help="List which levels should use the external legend for the annotation. Use a comma separate values form, e.g., --annotation_levels 1,2,3. Default is None") | 82 type=str, |
83 # shaded background | 83 required=False, |
84 parser.add_argument('--background_levels', | 84 help="List which levels should use the external legend for the annotation. Use a comma separate values form, " |
85 default=None, | 85 "e.g., --annotation_levels 1,2,3. Default is None") |
86 type=str, | 86 # shaded background |
87 required=False, | 87 parser.add_argument('--background_levels', |
88 help="List which levels should be highlight with a shaded background. Use a comma separate values form, e.g., --background_levels 1,2,3") | 88 default=None, |
89 parser.add_argument('--background_clades', | 89 type=str, |
90 default=None, | 90 required=False, |
91 type=str, | 91 help="List which levels should be highlight with a shaded background. Use a comma separate values form, e.g., " |
92 required=False, | 92 "--background_levels 1,2,3. Default is None") |
93 help="Specify the clades that should be highlight with a shaded background. Use a comma separate values form and surround the string with \" if it contains spaces. Example: --background_clades \"Bacteria.Actinobacteria, Bacteria.Bacteroidetes.Bacteroidia, Bacteria.Firmicutes.Clostridia.Clostridiales\"") | 93 parser.add_argument('--background_clades', |
94 parser.add_argument('--background_colors', | 94 default=None, |
95 default=None, | 95 type=str, |
96 type=str, | 96 required=False, |
97 required=False, | 97 help="Specify the clades that should be highlight with a shaded background. Use a comma separate values form " |
98 help="Set the color to use for the shaded background. Colors can be either in RGB or HSV (using a semi-colon to separate values, surrounded with ()) format. Use a comma separate values form and surround the string with \" if it contains spaces. Example: --background_colors \"#29cc36, (150; 100; 100), (280; 80; 88)\"") | 98 "and surround the string with \" if there are spaces. Example: --background_clades \"Bacteria.Actinobacteria, " |
99 # title | 99 "Bacteria.Bacteroidetes.Bacteroidia, Bacteria.Firmicutes.Clostridia.Clostridiales\". Default is None") |
100 parser.add_argument('--title', | 100 parser.add_argument('--background_colors', |
101 type=str, | 101 default=None, |
102 required=False, | 102 type=str, |
103 help="If specified set the title of the GraPhlAn plot. Surround the string with \" if it contains spaces, e.g., --title \"Title example\"") | 103 required=False, |
104 # title font size | 104 help="Set the color to use for the shaded background. Colors can be either in RGB or HSV (using a semi-colon to " |
105 parser.add_argument('--title_font_size', | 105 "separate values, surrounded with ()) format. Use a comma separate values form and surround the string with " |
106 default=15, | 106 "\" if it contains spaces. Example: --background_colors \"#29cc36, (150; 100; 100), (280; 80; 88)\". Default " |
107 type=int, | 107 "is None") |
108 required=False, | 108 # title |
109 help="Set the title font size. Default is 15") | 109 parser.add_argument('--title', |
110 # clade size | 110 type=str, |
111 parser.add_argument('--def_clade_size', | 111 required=False, |
112 default=10., | 112 help="If specified set the title of the GraPhlAn plot. Surround the string with \" if it contains spaces, e.g., " |
113 type=float, | 113 "--title \"Title example\"") |
114 required=False, | 114 # title font size |
115 help="Set a default size for clades that are not found as biomarkers by LEfSe. Default is 10") | 115 parser.add_argument('--title_font_size', |
116 parser.add_argument('--min_clade_size', | 116 default=15, |
117 default=20., | 117 type=int, |
118 type=float, | 118 required=False, |
119 required=False, | 119 help="Set the title font size. Default is 15") |
120 help="Set the minimum value of clades that are biomarkers. Default is 20") | 120 # clade size |
121 parser.add_argument('--max_clade_size', | 121 parser.add_argument('--def_clade_size', |
122 default=200., | 122 default=10., |
123 type=float, | 123 type=float, |
124 required=False, | 124 required=False, |
125 help="Set the maximum value of clades that are biomarkers. Default is 200") | 125 help="Set a default size for clades that are not found as biomarkers by LEfSe. Default is 10") |
126 # font size | 126 parser.add_argument('--min_clade_size', |
127 parser.add_argument('--def_font_size', | 127 default=20., |
128 default=10, | 128 type=float, |
129 type=int, | 129 required=False, |
130 required=False, | 130 help="Set the minimum value of clades that are biomarkers. Default is 20") |
131 help="Set a default font size. Default is 10") | 131 parser.add_argument('--max_clade_size', |
132 parser.add_argument('--min_font_size', | 132 default=200., |
133 default=8, | 133 type=float, |
134 type=int, | 134 required=False, |
135 required=False, | 135 help="Set the maximum value of clades that are biomarkers. Default is 200") |
136 help="Set the minimum font size to use. Default is 8") | 136 # font size |
137 parser.add_argument('--max_font_size', | 137 parser.add_argument('--def_font_size', |
138 default=12, | 138 default=10, |
139 type=int, | 139 type=int, |
140 required=False, | 140 required=False, |
141 help="Set the maximum font size. Default is 12") | 141 help="Set a default font size. Default is 10") |
142 # legend font size | 142 parser.add_argument('--min_font_size', |
143 parser.add_argument('--annotation_legend_font_size', | 143 default=8, |
144 default=10, | 144 type=int, |
145 type=int, | 145 required=False, |
146 required=False, | 146 help="Set the minimum font size to use. Default is 8") |
147 help="Set the font size for the annotation legend. Default is 10") | 147 parser.add_argument('--max_font_size', |
148 # abundance threshold | 148 default=12, |
149 parser.add_argument('--abundance_threshold', | 149 type=int, |
150 default=20., | 150 required=False, |
151 type=float, | 151 help="Set the maximum font size. Default is 12") |
152 required=False, | 152 # legend font size |
153 help="Set the minimun abundace value for a clade to be annotated. Default is 20.0") | 153 parser.add_argument('--annotation_legend_font_size', |
154 # ONLY lefse_input provided | 154 default=10, |
155 parser.add_argument('--most_abundant', | 155 type=int, |
156 default=10, | 156 required=False, |
157 type=int, | 157 help="Set the font size for the annotation legend. Default is 10") |
158 required=False, | 158 # abundance threshold |
159 help="When only lefse_input is provided, you can specify how many clades highlight. Since the biomarkers are missing, they will be chosen from the most abundant") | 159 parser.add_argument('--abundance_threshold', |
160 parser.add_argument('--least_biomarkers', | 160 default=20., |
161 default=3, | 161 type=float, |
162 type=int, | 162 required=False, |
163 required=False, | 163 help="Set the minimun abundace value for a clade to be annotated. Default is 20.0") |
164 help="When only lefse_input is provided, you can specify the minimum number of biomarkers to extract. The taxonomy is parsed, and the level is choosen in order to have at least the specified number of biomarkers") | 164 # ONLY lefse_input provided |
165 # decide to keep the OTU id or to merger at the above taxonomic level | 165 parser.add_argument('--most_abundant', |
166 parser.add_argument('--discard_otus', | 166 default=10, |
167 default=True, | 167 type=int, |
168 action='store_false', | 168 required=False, |
169 help="If specified the OTU ids will be discarde from the taxonmy. Default behavior keep OTU ids in taxonomy") | 169 help="When only lefse_input is provided, you can specify how many clades highlight. Since the biomarkers are " |
170 | 170 "missing, they will be chosen from the most abundant. Default is 10") |
171 DataMatrix.input_parameters(parser) | 171 parser.add_argument('--least_biomarkers', |
172 args = parser.parse_args() | 172 default=3, |
173 | 173 type=int, |
174 # check if at least one of the input params is given | 174 required=False, |
175 if (not args.lefse_input) and (not args.lefse_output) : | 175 help="When only lefse_input is provided, you can specify the minimum number of biomarkers to extract. The " |
176 raise Exception("[read_params()] You must provide at least one of the two input parameters: ") | 176 "taxonomy is parsed, and the level is choosen in order to have at least the specified number of biomarkers. " |
177 | 177 "Default is 3") |
178 # check that min_clade_size is less than max_clade_size | 178 # decide to keep the OTU id or to merger at the above taxonomic level |
179 if args.min_clade_size > args.max_clade_size : | 179 parser.add_argument('--discard_otus', |
180 print "[W] min_clade_size cannot be greater than max_clade_size, assigning their default values" | 180 default=True, |
181 args.min_clade_size = 20. | 181 action='store_false', |
182 args.max_clade_size = 200. | 182 help="If specified the OTU ids will be discarde from the taxonmy. Default is True, i.e. keep OTUs IDs in taxonomy") |
183 | 183 # decide to keep the OTU id or to merger at the above taxonomic level |
184 # check that min_font_size is less than max_font_size | 184 parser.add_argument('--internal_levels', |
185 if args.min_font_size > args.max_font_size : | 185 default=False, |
186 print "[W] min_font_size cannot be greater than max_font_size, assigning their default values" | 186 action='store_true', |
187 args.min_font_size = 8 | 187 help="If specified sum-up from leaf to root the abundances values. Default is False, i.e. do not sum-up abundances " |
188 args.max_font_size = 12 | 188 "on the internal nodes") |
189 | 189 |
190 return args | 190 DataMatrix.input_parameters(parser) |
191 args = parser.parse_args() | |
192 | |
193 # check if at least one of the input params is given | |
194 if (not args.lefse_input) and (not args.lefse_output): | |
195 raise Exception("[read_params()] You must provide at least one of the two input parameters: ") | |
196 | |
197 # check that min_clade_size is less than max_clade_size | |
198 if args.min_clade_size > args.max_clade_size: | |
199 print "[W] min_clade_size cannot be greater than max_clade_size, assigning their default values" | |
200 args.min_clade_size = 20. | |
201 args.max_clade_size = 200. | |
202 | |
203 # check that min_font_size is less than max_font_size | |
204 if args.min_font_size > args.max_font_size: | |
205 print "[W] min_font_size cannot be greater than max_font_size, assigning their default values" | |
206 args.min_font_size = 8 | |
207 args.max_font_size = 12 | |
208 | |
209 return args | |
191 | 210 |
192 | 211 |
193 def get_file_type(filename): | 212 def get_file_type(filename): |
194 """ | 213 """ |
195 Return the extension (if any) of the ``filename`` in lower case. | 214 Return the extension (if any) of the ``filename`` in lower case. |
196 """ | 215 """ |
197 return filename[filename.rfind('.')+1:].lower() | 216 return filename[filename.rfind('.')+1:].lower() |
198 | 217 |
199 | 218 |
200 def parse_biom(filename, keep_otus=True): | 219 def parse_biom(filename, keep_otus=True, internal_levels=False): |
201 """ | 220 """ |
202 Load a biom table and extract the taxonomy (from metadata), removing the unuseful header. | 221 Load a biom table and extract the taxonomy (from metadata), removing the unuseful header. |
203 Return the input biom in tab-separated format. | 222 Return the input biom in tab-separated format. |
204 """ | 223 """ |
205 from biom import load_table # avoid to ask for the BIOM library if there is no biom file | 224 from biom import load_table # avoid to ask for the BIOM library if there is no biom file |
206 | 225 |
207 biom_table = load_table(filename) | 226 biom_table = load_table(filename) |
208 strs = biom_table.delimited_self(header_value='TAXA', header_key='taxonomy') | 227 strs = biom_table.delimited_self(header_value='TAXA', header_key='taxonomy') |
209 lst1 = [str(s) for s in strs.split('\n')[1:]] # skip the "# Constructed from biom file" entry | 228 lst1 = [str(s) for s in strs.split('\n')[1:]] # skip the "# Constructed from biom file" entry |
210 biom_file = [] | 229 biom_file = [] |
211 out = [lst1[0]] # save the header | 230 out = [lst1[0]] # save the header |
212 pre_taxa = compile(".__") | 231 pre_taxa = compile(".__") |
213 classs = compile("\(class\)") | 232 classs = compile("\(class\)") |
214 | 233 |
215 # consistency check | 234 # consistency check |
216 i = 0 | 235 i = 0 |
217 while i < (len(lst1)-1): | 236 while i < (len(lst1)-1): |
218 if len([s for s in lst1[i].split('\t')]) != len([s for s in lst1[i+1].split('\t')]) : | 237 if len([s for s in lst1[i].split('\t')]) != len([s for s in lst1[i+1].split('\t')]): |
219 raise Exception('[parse_biom()] It seems that taxonomic metadata are missing, maybe is the wrong biom file?') | 238 raise Exception('[parse_biom()] It seems that taxonomic metadata are missing, maybe is the wrong biom file?') |
220 | 239 |
221 i += 1 | 240 i += 1 |
222 | 241 |
223 for l in lst1[1:]: | 242 for l in lst1[1:]: |
224 otu = None | 243 otu = None |
225 lst = [str(s).strip() for s in l.split('\t')[1:-1]] | 244 lst = [float(s.strip()) for s in l.split('\t')[1:-1]] |
226 | 245 |
227 if keep_otus: | 246 if keep_otus: |
228 otu = l.split('\t')[0] | 247 otu = l.split('\t')[0] |
229 | 248 |
230 # Clean an move taxa in first place | 249 # Clean and move taxa in first place |
231 taxa = '.'.join([s.strip().replace('[', '').replace('u\'', '').replace(']', '').replace(' ', '').replace('\'', '') for s in l.split('\t')[-1].split(',')]) | 250 taxa = '.'.join([s.strip().replace('[', '').replace('u\'', '').replace(']', '').replace(' ', '').replace('\'', '') |
232 taxa = pre_taxa.sub('', taxa) # remove '{k|p|o|g|s}__' | 251 for s in l.split('\t')[-1].split(',')]) |
233 taxa = classs.sub('', taxa) # remove '(class)' | 252 taxa = pre_taxa.sub('', taxa) # remove '{k|p|o|g|s}__' |
234 taxa = taxa.rstrip('.') # remove trailing dots | 253 taxa = classs.sub('', taxa) # remove '(class)' |
235 | 254 taxa = taxa.rstrip('.') # remove trailing dots |
236 if otu: | 255 |
237 taxa = taxa + '.' + otu | 256 if otu: |
238 | 257 taxa = taxa + '.' + otu |
239 biom_file.append([taxa] + lst) | 258 |
240 | 259 biom_file.append([taxa] + lst) |
241 # merge such rows that have the same taxa | 260 |
242 i = 1 | 261 # merge such rows that have the same taxa |
243 dic = {} | 262 i = 1 |
244 | 263 dic = {} |
245 for l in biom_file[i:]: | 264 |
246 for k in biom_file[i+1:]: | 265 for l in biom_file[i:]: |
247 if l[0] == k[0]: | 266 if l[0] not in dic: |
248 lst = [] | 267 dic[l[0]] = l[1:] |
249 j = 1 | 268 |
250 | 269 for k in biom_file[i+1:]: |
251 while j < len(l): | 270 if l[0] == k[0]: |
252 lst.append(float(l[j]) + float(k[j])) | 271 lst = [] |
253 j += 1 | 272 lstdic = dic[l[0]] |
254 | 273 j = 1 |
255 if l[0] in dic: | 274 while j < len(lstdic): |
256 lst1 = dic[l[0]] | 275 lst.append(float(lstdic[j]) + float(k[j])) |
257 j = 0 | 276 j += 1 |
258 lst2 = [] | 277 |
259 | 278 dic[l[0]] = lst |
260 while j < len(lst1): | 279 i += 1 |
261 lst2.append(float(lst1[j]) + float(lst[j])) | 280 |
262 j += 1 | 281 feats = dict(dic) |
263 | 282 |
264 lst = lst2 | 283 if internal_levels: |
265 | 284 feats = add_missing_levels(feats) |
266 dic[l[0]] = lst | 285 |
267 # if not in dic, add it! | 286 for k in feats: |
268 if l[0] not in dic: | 287 out.append('\t'.join([str(s) for s in [k] + feats[k]])) |
269 dic[l[0]] = l[1:] | 288 |
270 | 289 return '\n'.join(out) |
271 i += 1 | 290 |
272 | 291 |
273 for k in dic: | 292 def add_missing_levels(ff, summ=True): |
274 out.append('\t'.join([str(s) for s in [k] + dic[k]])) | 293 """ |
275 | 294 Sum-up the internal abundances from leaf to root |
276 return '\n'.join(out) | 295 """ |
296 if sum([f.count(".") for f in ff]) < 1: | |
297 return ff | |
298 | |
299 clades2leaves = {} | |
300 for f in ff: | |
301 fs = f.split(".") | |
302 | |
303 if len(fs) < 2: | |
304 continue | |
305 | |
306 for l in range(1, len(fs)+1): | |
307 n = ".".join(fs[:l]) | |
308 | |
309 if n in clades2leaves: | |
310 clades2leaves[n].append(f) | |
311 else: | |
312 clades2leaves[n] = [f] | |
313 | |
314 ret = {} | |
315 for k in clades2leaves: | |
316 if summ: | |
317 ret[k] = [sum([sum(ff[e]) for e in clades2leaves[k]])] | |
318 else: | |
319 lst = [] | |
320 for e in clades2leaves[k]: | |
321 if not lst: | |
322 for i in ff[e]: | |
323 lst.append(i) | |
324 else: | |
325 lst1 = [] | |
326 i = 0 | |
327 while i < len(lst): | |
328 lst1.append(lst[i] + ff[e][i]) | |
329 i += 1 | |
330 lst = lst1 | |
331 | |
332 ret[k] = lst | |
333 | |
334 return ret | |
277 | 335 |
278 | 336 |
279 def get_most_abundant(abundances, xxx): | 337 def get_most_abundant(abundances, xxx): |
280 """ | 338 """ |
281 Sort by the abundance level all the taxonomy that represent at least two levels. | 339 Sort by the abundance level all the taxonomy that represent at least two levels. |
282 Return the first ``xxx`` most abundant. | 340 Return the first ``xxx`` most abundant. |
283 """ | 341 """ |
284 abundant = [] | 342 abundant = [] |
285 | 343 |
286 for a in abundances : | 344 for a in abundances: |
287 if a.count('|') > 0: | 345 if a.count('|') > 0: |
288 abundant.append((float(abundances[a]), a.replace('|', '.'))) | 346 abundant.append((float(abundances[a]), a.replace('|', '.'))) |
289 elif a.count('.') > 0: | 347 elif a.count('.') > 0: |
290 abundant.append((float(abundances[a]), a)) | 348 abundant.append((float(abundances[a]), a)) |
291 | 349 |
292 abundant.sort(reverse=True) | 350 abundant.sort(reverse=True) |
293 return abundant[:xxx] | 351 return abundant[:xxx] |
294 | 352 |
295 | 353 |
296 def get_biomarkes(abundant, xxx): | 354 def get_biomarkes(abundant, xxx): |
297 """ | 355 """ |
298 Split the taxonomy and then look, level by level, when there are at least ``xxx`` distinct branches. | 356 Split the taxonomy and then look, level by level, when there are at least ``xxx`` distinct branches. |
299 Return the set of branches as biomarkers to highlight. | 357 Return the set of branches as biomarkers to highlight. |
300 """ | 358 """ |
301 cc = [] | 359 cc = [] |
302 bk = set() | 360 bk = set() |
303 lvl = 0 | 361 lvl = 0 |
304 | 362 |
305 for _, t in abundant: | 363 for _, t in abundant: |
306 cc.append(t.split('.')) | 364 cc.append(t.split('.')) |
307 | 365 |
308 while lvl < len(max(cc)): | 366 while lvl < len(max(cc)): |
309 bk = set() | 367 bk = set() |
310 | 368 |
311 for c in cc: | 369 for c in cc: |
312 if lvl < len(c): | 370 if lvl < len(c): |
313 bk |= set([c[lvl]]) | 371 bk |= set([c[lvl]]) |
314 | 372 |
315 if len(bk) >= xxx: | 373 if len(bk) >= xxx: |
316 break | 374 break |
317 | 375 |
318 lvl += 1 | 376 lvl += 1 |
319 | 377 |
320 return bk | 378 return bk |
321 | 379 |
322 def scale_clade_size(minn, maxx, abu, max_abu): | 380 def scale_clade_size(minn, maxx, abu, max_abu): |
323 """ | 381 """ |
324 Return the value of ``abu`` scaled to ``max_abu`` logarithmically, and then map from ``minn`` to ``maxx``. | 382 Return the value of ``abu`` scaled to ``max_abu`` logarithmically, and then map from ``minn`` to ``maxx``. |
325 """ | 383 """ |
326 return minn + maxx*log(1. + (abu/max_abu)) | 384 return minn + maxx*log(1. + (abu/max_abu)) |
327 | 385 |
328 | 386 |
329 def main(): | 387 def main(): |
330 """ | 388 """ |
331 """ | 389 """ |
332 colors = [(245., 90., 100.), (125., 80., 80.), (0., 80., 100.), (195., 100., 100.), (150., 100., 100.), (55., 100., 100.), (280., 80., 88.)] # HSV format | 390 colors = [(245., 90., 100.), (125., 80., 80.), (0., 80., 100.), (195., 100., 100.), |
333 args = read_params() | 391 (150., 100., 100.), (55., 100., 100.), (280., 80., 88.)] # HSV format |
334 lefse_input = None | 392 args = read_params() |
335 lefse_output = {} | 393 lefse_input = None |
336 color = {} | 394 lefse_output = {} |
337 biomarkers = set() | 395 color = {} |
338 taxa = [] | 396 biomarkers = set() |
339 abundances = {} | 397 taxa = [] |
340 max_abundances = None | 398 abundances = {} |
341 max_effect_size = None | 399 max_abundances = None |
342 max_log_effect_size = None | 400 max_effect_size = None |
343 background_list = [] | 401 max_log_effect_size = None |
344 background_clades = [] | 402 background_list = [] |
345 background_colors = {} | 403 background_clades = [] |
346 annotations_list = [] | 404 background_colors = {} |
347 external_annotations_list = [] | 405 annotations_list = [] |
348 lin = False | 406 external_annotations_list = [] |
349 lout = False | 407 lin = False |
350 | 408 lout = False |
351 # get the levels that should be shaded | 409 |
352 if args.background_levels : | 410 # get the levels that should be shaded |
353 background_list = [int(i.strip()) for i in args.background_levels.strip().split(',')] | 411 if args.background_levels: |
354 | 412 background_list = [int(i.strip()) for i in args.background_levels.strip().split(',')] |
355 # get the background_clades | 413 |
356 if args.background_clades : | 414 # get the background_clades |
357 if get_file_type(args.background_colors) in ['txt'] : | 415 if args.background_clades: |
358 with open(args.background_clades, 'r') as f: | 416 if get_file_type(args.background_colors) in ['txt']: |
359 background_clades = [str(s.strip()) for s in f] | 417 with open(args.background_clades, 'r') as f: |
360 else : # it's a string in csv format | 418 background_clades = [str(s.strip()) for s in f] |
361 background_clades = [str(s.strip()) for s in args.background_clades.split(',')] | 419 else: # it's a string in csv format |
362 | 420 background_clades = [str(s.strip()) for s in args.background_clades.split(',')] |
363 # read the set of colors to use for the background_clades | 421 |
364 if args.background_colors : | 422 # read the set of colors to use for the background_clades |
365 col = [] | 423 if args.background_colors: |
366 | 424 col = [] |
367 if get_file_type(args.background_colors) in ['txt'] : | 425 |
368 with open(args.background_colors, 'r') as f : | 426 if get_file_type(args.background_colors) in ['txt']: |
369 col = [str(s.strip()) for s in f] | 427 with open(args.background_colors, 'r') as f: |
370 else : # it's a string in csv format | 428 col = [str(s.strip()) for s in f] |
371 col = [c.strip() for c in args.background_colors.split(',')] | 429 else: # it's a string in csv format |
372 | 430 col = [c.strip() for c in args.background_colors.split(',')] |
373 lst = {} | 431 |
374 i = 0 | 432 lst = {} |
375 | 433 i = 0 |
376 for c in background_clades : | 434 |
377 cc = c[:c.find('.')] | 435 for c in background_clades: |
378 | 436 cc = c[:c.find('.')] |
379 if cc not in lst : | 437 |
380 background_colors[c] = col[i % len(col)] | 438 if cc not in lst: |
381 lst[cc] = col[i % len(col)] | 439 background_colors[c] = col[i % len(col)] |
382 i += 1 | 440 lst[cc] = col[i % len(col)] |
383 else : | 441 i += 1 |
384 background_colors[c] = lst[cc] | 442 else: |
385 | 443 background_colors[c] = lst[cc] |
386 # get the levels that will use the internal annotation | 444 |
387 if args.annotations : | 445 # get the levels that will use the internal annotation |
388 annotations_list = [int(i.strip()) for i in args.annotations.strip().split(',')] | 446 if args.annotations: |
389 | 447 annotations_list = [int(i.strip()) for i in args.annotations.strip().split(',')] |
390 # get the levels that will use the external legend annotation | 448 |
391 if args.external_annotations : | 449 # get the levels that will use the external legend annotation |
392 external_annotations_list = [int(i.strip()) for i in args.external_annotations.strip().split(',')] | 450 if args.external_annotations: |
393 | 451 external_annotations_list = [int(i.strip()) for i in args.external_annotations.strip().split(',')] |
394 if args.lefse_input : | 452 |
395 # if the lefse_input is in biom format, convert it | 453 if args.lefse_input: |
396 if get_file_type(args.lefse_input) in 'biom' : | 454 # if the lefse_input is in biom format, convert it |
397 try : | 455 if get_file_type(args.lefse_input) in 'biom': |
398 biom = parse_biom(args.lefse_input, args.discard_otus) | 456 try: |
399 lefse_input = DataMatrix(StringIO(biom), args) | 457 biom = parse_biom(args.lefse_input, args.discard_otus, args.internal_levels) |
400 except Exception as e : | 458 lefse_input = DataMatrix(StringIO(biom), args) |
401 lin = True | 459 except Exception as e: |
402 print e | 460 lin = True |
403 else : | 461 print e |
404 lefse_input = DataMatrix(args.lefse_input, args) | 462 else: |
405 | 463 if args.internal_levels: |
406 if not lin : | 464 aaa = {} |
407 taxa = [t.replace('|', '.') for t in lefse_input.get_fnames()] # build taxonomy list | 465 header = None |
408 abundances = dict(lefse_input.get_averages()) | 466 with open(args.lefse_input, 'r') as f: |
409 max_abundances = max([abundances[x] for x in abundances]) | 467 for r in f: |
410 else : # no lefse_input provided | 468 if header is None: |
411 lin = True | 469 header = [s.strip() for s in r.split('\t')] |
412 | 470 else: |
413 if args.lefse_output : | 471 row = r.split('\t') |
414 # if the lefse_output is in biom format... I don't think it's possible! | 472 aaa[row[0].strip().replace('|', '.')] = [float(s.strip()) for s in row[1:]] |
415 if get_file_type(args.lefse_output) in 'biom' : | 473 |
416 lout = True | 474 feats = add_missing_levels(aaa, summ=False) |
417 print "Seriously?? LEfSe output file is not expected to be in biom format!" | 475 ss = '\t'.join(header) |
418 else : | 476 ss += '\n' |
419 lst = [] | 477 ss += '\n'.join(['\t'.join([str(s) for s in [k] + feats[k]]) for k in feats]) |
420 | 478 lefse_input = DataMatrix(StringIO(ss), args) |
421 with open(args.lefse_output, 'r') as out_file : | 479 else: |
422 for line in out_file : | 480 lefse_input = DataMatrix(args.lefse_input, args) |
423 t, m, bk, es, pv = line.strip().split('\t') | 481 |
424 lefse_output[t] = (es, bk, m, pv) | 482 if not lin: |
425 | 483 taxa = [t.replace('|', '.').strip() for t in lefse_input.get_fnames()] # build taxonomy list |
426 # get distinct biomarkers | 484 abundances = dict(lefse_input.get_averages()) |
427 if bk : | 485 max_abundances = max([abundances[x] for x in abundances]) |
428 biomarkers |= set([bk]) | 486 else: # no lefse_input provided |
429 | 487 lin = True |
430 # get all effect size | 488 |
431 if es : | 489 if args.lefse_output: |
432 lst.append(float(es)) | 490 # if the lefse_output is in biom format... I don't think it's possible! |
433 | 491 if get_file_type(args.lefse_output) in 'biom': |
434 max_effect_size = max(lst) | 492 lout = True |
435 | 493 print "Seriously?? LEfSe output file is not expected to be in biom format!" |
436 # no lefse_input file provided! | 494 else: |
437 if (not taxa) and (not abundances) : # build taxonomy list and abundaces map | 495 lst = [] |
438 for t in lefse_output : | 496 |
439 _, _, m, _ = lefse_output[t] | 497 with open(args.lefse_output, 'r') as out_file: |
440 abundances[t.replace('.', '|')] = float(m) | 498 for line in out_file: |
441 | 499 t, m, bk, es, pv = line.strip().split('\t') |
442 max_abundances = max([abundances[x] for x in abundances]) | 500 lefse_output[t] = (es, bk, m, pv) |
443 | 501 |
444 for t in lefse_output : | 502 # get distinct biomarkers |
445 scaled = scale_clade_size(args.min_clade_size, args.max_clade_size, abundances[t.replace('.', '|')], max_abundances) | 503 if bk: |
446 | 504 biomarkers |= set([bk]) |
447 if scaled >= args.abundance_threshold : | 505 |
448 taxa.append(t) | 506 # get all effect size |
449 elif not lin : # no lefse_output provided and lefse_input correctly red | 507 if es: |
450 lout = True | 508 lst.append(float(es)) |
451 | 509 |
452 # find the xxx most abundant | 510 max_effect_size = max(lst) |
453 abundant = get_most_abundant(abundances, args.most_abundant) | 511 |
454 | 512 # no lefse_input file provided! |
455 # find the taxonomy level with at least yyy distinct childs from the xxx most abundant | 513 if (not taxa) and (not abundances): # build taxonomy list and abundaces map |
456 biomarkers = get_biomarkes(abundant, args.least_biomarkers) | 514 for t in lefse_output: |
457 | 515 _, _, m, _ = lefse_output[t] |
458 # compose lefse_output variable | 516 abundances[t.replace('.', '|')] = float(m) |
459 for _, t in abundant : | 517 |
460 b = '' | 518 max_abundances = max([abundances[x] for x in abundances]) |
461 | 519 |
462 for bk in biomarkers : | 520 for t in lefse_output: |
463 if bk in t : | 521 scaled = scale_clade_size(args.min_clade_size, args.max_clade_size, |
464 b = bk | 522 abundances[t.replace('.', '|')], max_abundances) |
465 | 523 |
466 lefse_output[t] = (2., b, '', '') | 524 if scaled >= args.abundance_threshold: |
467 | 525 taxa.append(t) |
468 max_effect_size = 1. # It's not gonna working | 526 elif not lin: # no lefse_output provided and lefse_input correctly red |
469 | 527 lout = True |
470 | 528 |
471 # no lefse_output and no lefse_input provided | 529 # find the xxx most abundant |
472 if lin and lout : | 530 abundant = get_most_abundant(abundances, args.most_abundant) |
473 print "You must provide at least one input file!" | 531 |
474 exit(1) | 532 # find the taxonomy level with at least yyy distinct childs from the xxx most abundant |
475 | 533 biomarkers = get_biomarkes(abundant, args.least_biomarkers) |
476 # write the tree | 534 |
477 with open(args.tree, 'w') as tree_file : | 535 # compose lefse_output variable |
478 for taxonomy in taxa : | 536 for _, t in abundant: |
479 tree_file.write(''.join([taxonomy, '\n'])) | 537 b = '' |
480 | 538 |
481 # for each biomarker assign it to a different color | 539 for bk in biomarkers: |
482 i = 0 | 540 if bk in t: |
483 | 541 b = bk |
484 for bk in biomarkers : | 542 |
485 color[bk] = i % len(colors) | 543 lefse_output[t] = (2., b, '', '') |
486 i += 1 | 544 |
487 | 545 max_effect_size = 1. # It's not gonna working |
488 # find max log abs value of effect size | 546 # no lefse_output and no lefse_input provided |
489 if lefse_output : | 547 if lin and lout: |
490 lst = [] | 548 print "You must provide at least one input file!" |
491 | 549 exit(1) |
492 for t in lefse_output : | 550 |
493 es, _, _, _ = lefse_output[t] | 551 # write the tree |
494 | 552 with open(args.tree, 'w') as tree_file: |
495 if es : | 553 tree_file.write('\n'.join(taxa)) |
496 lst.append(abs(log(float(es) / max_effect_size))) | 554 |
497 | 555 # for each biomarker assign it to a different color |
498 max_log_effect_size = max(lst) | 556 i = 0 |
499 | 557 |
500 # write the annotation | 558 for bk in biomarkers: |
501 try : | 559 color[bk] = i % len(colors) |
502 with open(args.annotation, 'w') as annot_file : | 560 i += 1 |
503 # set the title | 561 |
504 if args.title : | 562 # find max log abs value of effect size |
505 annot_file.write(''.join(['\t'.join(['title', args.title]), '\n'])) | 563 if lefse_output: |
506 annot_file.write(''.join(['\t'.join(['title_font_size', str(args.title_font_size)]), '\n'])) | 564 lst = [] |
507 | 565 |
508 # write some basic customizations | 566 for t in lefse_output: |
509 annot_file.write(''.join(['\t'.join(['clade_separation', '0.5']), '\n'])) | 567 es, _, _, _ = lefse_output[t] |
510 annot_file.write(''.join(['\t'.join(['branch_bracket_depth', '0.8']), '\n'])) | 568 |
511 annot_file.write(''.join(['\t'.join(['branch_bracket_width', '0.2']), '\n'])) | 569 if es: |
512 annot_file.write(''.join(['\t'.join(['annotation_legend_font_size', str(args.annotation_legend_font_size)]), '\n'])) | 570 lst.append(abs(log(float(es) / max_effect_size))) |
513 annot_file.write(''.join(['\t'.join(['class_legend_font_size', '10']), '\n'])) | 571 |
514 annot_file.write(''.join(['\t'.join(['class_legend_marker_size', '1.5']), '\n'])) | 572 max_log_effect_size = max(lst) |
515 | 573 |
516 # write the biomarkers' legend | 574 # write the annotation |
517 for bk in biomarkers : | 575 try: |
518 biom = bk.replace('_', ' ').upper() | 576 with open(args.annotation, 'w') as annot_file: |
519 rgb = scale_color(colors[color[bk]]) | 577 # set the title |
520 annot_file.write(''.join(['\t'.join([biom, 'annotation', biom]), '\n'])) | 578 if args.title: |
521 annot_file.write(''.join(['\t'.join([biom, 'clade_marker_color', rgb]), '\n'])) | 579 annot_file.write('\n'.join(['\t'.join(['title', args.title]), |
522 annot_file.write(''.join(['\t'.join([biom, 'clade_marker_size', '40']), '\n'])) | 580 '\t'.join(['title_font_size', str(args.title_font_size)]), '\n'])) |
523 | 581 |
524 done_clades = [] | 582 # write some basic customizations |
525 | 583 annot_file.write('\n'.join(['\t'.join(['clade_separation', '0.5']), |
526 # write the annotation for the tree | 584 '\t'.join(['branch_bracket_depth', '0.8']), |
527 for taxonomy in taxa : | 585 '\t'.join(['branch_bracket_width', '0.2']), |
528 level = taxonomy.count('.') + 1 # which level is this taxonomy? | 586 '\t'.join(['annotation_legend_font_size', str(args.annotation_legend_font_size)]), |
529 clean_taxonomy = taxonomy[taxonomy.rfind('.') + 1:] # retrieve the last level in taxonomy | 587 '\t'.join(['class_legend_font_size', '10']), |
530 scaled = args.def_clade_size | 588 '\t'.join(['class_legend_marker_size', '1.5']), '\n'])) |
531 | 589 |
532 # scaled the size of the clade by the average abundance | 590 # write the biomarkers' legend |
533 if taxonomy.replace('.', '|') in abundances : | 591 for bk in biomarkers: |
534 scaled = scale_clade_size(args.min_clade_size, args.max_clade_size, abundances[taxonomy.replace('.', '|')], max_abundances) | 592 biom = bk.replace('_', ' ').upper() |
535 | 593 rgb = scale_color(colors[color[bk]]) |
536 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'clade_marker_size', str(scaled)]), '\n'])) | 594 annot_file.write('\n'.join(['\t'.join([biom, 'annotation', biom]), |
537 | 595 '\t'.join([biom, 'clade_marker_color', rgb]), |
538 # put a bakcground annotation to the levels specified by the user | 596 '\t'.join([biom, 'clade_marker_size', '40']), '\n'])) |
539 shaded_background = [] | 597 |
540 | 598 # write the annotation for the tree |
541 for l in background_list : | 599 for taxonomy in taxa: |
542 if level >= l : | 600 level = taxonomy.count('.') + 1 # which level is this taxonomy? |
543 lst = [s.strip() for s in taxonomy.strip().split('.')] | 601 clean_taxonomy = taxonomy[taxonomy.rfind('.') + 1:] # retrieve the last level in taxonomy |
544 t = '.'.join(lst[:l]) | 602 cleanest_taxonomy = clean_taxonomy.replace('_', ' ') # substitute '_' with ' ' |
545 | 603 scaled = args.def_clade_size |
546 if t not in shaded_background : | 604 |
547 shaded_background.append(t) | 605 # scaled the size of the clade by the average abundance |
548 | 606 if (taxonomy in abundances) or (taxonomy.replace('.', '|') in abundances): |
549 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / l) | 607 try: |
550 | 608 abu = abundances[taxonomy.replace('.', '|')] |
551 annot_file.write(''.join(['\t'.join([t, 'annotation_background_color', args.background_color]), '\n'])) | 609 except: |
552 annot_file.write(''.join(['\t'.join([t, 'annotation', '*']), '\n'])) | 610 abu = abundances[taxonomy] |
553 annot_file.write(''.join(['\t'.join([t, 'annotation_font_size', str(font_size)]), '\n'])) | 611 |
554 | 612 scaled = scale_clade_size(args.min_clade_size, args.max_clade_size, abu, max_abundances) |
555 # put a bakcground annotation to the clades specified by the user | 613 |
556 for c in background_colors : | 614 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'clade_marker_size', str(scaled)]), '\n'])) |
557 bg_color = background_colors[c] | 615 |
558 | 616 # put a bakcground annotation to the levels specified by the user |
559 if not bg_color.startswith('#') : | 617 shaded_background = [] |
560 bg_color = bg_color.replace('(', '').replace(')', '') | 618 |
561 h, s, v = bg_color.split(';') | 619 for l in background_list: |
562 bg_color = scale_color((float(h.strip()) , float(s.strip()), float(v.strip()))) | 620 if level >= l: |
563 | 621 lst = [s.strip() for s in taxonomy.strip().split('.')] |
564 # check if the taxonomy has more than one level | 622 t = '.'.join(lst[:l]) |
565 lvls = [str(cc.strip()) for cc in c.split('.')] | 623 |
566 | 624 if t not in shaded_background: |
567 for l in lvls : | 625 shaded_background.append(t) |
568 if (l in taxonomy) and (l not in done_clades) : | 626 |
569 lvl = taxonomy[:taxonomy.index(l)].count('.') + 1 | 627 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / l) |
570 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / lvl) | 628 |
571 | 629 annot_file.write('\n'.join(['\t'.join([t, 'annotation_background_color', args.background_color]), |
572 | 630 '\t'.join([t, 'annotation', t.replace('_', ' ')]), |
573 annot_file.write(''.join(['\t'.join([l, 'annotation_background_color', bg_color]), '\n'])) | 631 '\t'.join([t, 'annotation_font_size', str(font_size)]), '\n'])) |
574 annot_file.write(''.join(['\t'.join([l, 'annotation', '*']), '\n'])) | 632 |
575 annot_file.write(''.join(['\t'.join([l, 'annotation_font_size', str(font_size)]), '\n'])) | 633 # put a bakcground annotation to the clades specified by the user |
576 | 634 for c in background_colors: |
577 done_clades.append(l) | 635 bg_color = background_colors[c] |
578 | 636 |
579 if lefse_output : | 637 if not bg_color.startswith('#'): |
580 if taxonomy in lefse_output : | 638 bg_color = bg_color.replace('(', '').replace(')', '') |
581 es, bk, _, _ = lefse_output[taxonomy] | 639 h, s, v = bg_color.split(';') |
582 | 640 bg_color = scale_color((float(h.strip()) , float(s.strip()), float(v.strip()))) |
583 # if it is a biomarker then color and label it! | 641 |
584 if bk : | 642 # check if the taxonomy has more than one level |
585 fac = abs(log(float(es) / max_effect_size)) / max_log_effect_size | 643 lvls = [str(cc.strip()) for cc in c.split('.')] |
586 | 644 done_clades = [] |
587 try : | 645 |
588 rgbs = scale_color(colors[color[bk]], fac) | 646 for l in lvls: |
589 except Exception as e : | 647 if (l in taxonomy) and (l not in done_clades): |
590 print e | 648 lvl = taxonomy[:taxonomy.index(l)].count('.') + 1 |
591 print ' '.join(["[W] Assign to", taxonomy, "the default color:", colors[color[bk]]]) | 649 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / lvl) |
592 rgbs = colors[color[bk]] | 650 |
593 | 651 annot_file.write('\n'.join(['\t'.join([l, 'annotation_background_color', bg_color]), |
594 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'clade_marker_color', rgbs]), '\n'])) | 652 '\t'.join([l, 'annotation', l.replace('_', ' ')]), |
595 | 653 '\t'.join([l, 'annotation_font_size', str(font_size)]), '\n'])) |
596 # write the annotation only if the abundance is above a given threshold and it is either internal or external annotation lists | 654 |
597 if (scaled >= args.abundance_threshold) and ((level in annotations_list) or (level in external_annotations_list)) : | 655 done_clades.append(l) |
598 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / level) | 656 |
599 annotation = '*' if level in annotations_list else '*:*' | 657 if lefse_output: |
600 | 658 if taxonomy in lefse_output: |
601 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'annotation_background_color', rgbs]), '\n'])) | 659 es, bk, _, _ = lefse_output[taxonomy] |
602 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'annotation', annotation]), '\n'])) | 660 |
603 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'annotation_font_size', str(font_size)]), '\n'])) | 661 # if it is a biomarker then color and label it! |
604 except Exception as e : | 662 if bk: |
605 print e | 663 fac = abs(log(float(es) / max_effect_size)) / max_log_effect_size |
606 | 664 |
607 | 665 try: |
608 if __name__ == '__main__' : | 666 rgbs = scale_color(colors[color[bk]], fac) |
609 main() | 667 except Exception as e: |
668 print e | |
669 print ' '.join(["[W] Assign to", taxonomy, "the default color:", colors[color[bk]]]) | |
670 rgbs = colors[color[bk]] | |
671 | |
672 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'clade_marker_color', rgbs]), '\n'])) | |
673 | |
674 # write the annotation only if the abundance is above a given threshold and it is either | |
675 # internal or external annotation lists | |
676 if (scaled >= args.abundance_threshold) and \ | |
677 ((level in annotations_list) or (level in external_annotations_list)): | |
678 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / level) | |
679 annotation = cleanest_taxonomy if level in annotations_list else '*:' + cleanest_taxonomy | |
680 | |
681 annot_file.write('\n'.join(['\t'.join([clean_taxonomy, 'annotation_background_color', rgbs]), | |
682 '\t'.join([clean_taxonomy, 'annotation', annotation]), | |
683 '\t'.join([clean_taxonomy, 'annotation_font_size', str(font_size)]), '\n'])) | |
684 except Exception as e: | |
685 print e | |
686 | |
687 | |
688 if __name__ == '__main__': | |
689 main() | |
690 |