annotate export2graphlan.py @ 34:ed025ebc2bdc draft

Uploaded
author george-weingart
date Fri, 05 Sep 2014 17:41:00 -0400
parents 82fb838d02dc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
1 #!/usr/bin/env python
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
2
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
3 from argparse import ArgumentParser
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
4 from colorsys import hsv_to_rgb
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
5 from math import log
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
6 from StringIO import StringIO
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
7 from re import compile
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
8 from hclust2.hclust2 import DataMatrix
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
9
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
10
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
11 __author__ = 'Francesco Asnicar'
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
12 __email__ = "francesco.asnicar@gmail.com"
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
13 __version__ = '0.17'
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
14 __date__ = '21th August 2014'
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
15
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
16
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
17 def scale_color((h, s, v), factor=1.):
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
18 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
19 Takes as input a tuple that represents a color in HSV format, and optionally a scale factor.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
20 Return an RGB string that is the converted HSV color, scaled by the given factor.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
21 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
22 if (h < 0.) or (h > 360.):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
23 raise Exception('[scale_color()] Hue value out of range (0, 360): ' + str(h))
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
24
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
25 if (s < 0.) or (s > 100.):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
26 raise Exception('[scale_color()] Saturation value out of range (0, 100): ' + str(s))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
27
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
28 if (v < 0.) or (v > 100.):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
29 raise Exception('[scale_color()] Value value out of range (0, 100): ' + str(v))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
30
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
31 if (factor < 0.) or (factor > 1.):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
32 raise Exception('[scale_color()] Factor value out of range (0.0, 1.0): ' + str(factor))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
33
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
34 v *= factor
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
35 r, g, b = hsv_to_rgb(h/360., s/100., v/100.)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
36
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
37 return '#{0:02x}{1:02x}{2:02x}'.format(int(round(r*255.)), int(round(g*255.)), int(round(b*255.)))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
38
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
39
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
40 def read_params():
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
41 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
42 Parse the input parameters, performing some validity check.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
43 Return the parsed arguments.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
44 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
45 parser = ArgumentParser(description="export2graphlan.py (ver. " + __version__ +
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
46 " of " + __date__ + "). Convert MetaPhlAn, LEfSe, and/or HUMAnN output to GraPhlAn input format. Authors: "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
47 + __author__ + " (" + __email__ + ")")
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
48
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
49 # input parameters group
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
50 group = parser.add_argument_group(title='input parameters',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
51 description="You need to provide at least one of the two arguments")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
52 group.add_argument('-i', '--lefse_input',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
53 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
54 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
55 help="LEfSe input data. A file that can be given to LEfSe for biomarkers analysis. It can be the result of a "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
56 "MetaPhlAn or HUMAnN analysis")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
57 group.add_argument('-o', '--lefse_output',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
58 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
59 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
60 help="LEfSe output result data. The result of LEfSe analysis performed on the lefse_input file")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
61
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
62 # output parameters group
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
63 group = parser.add_argument_group(title='output parameters')
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
64 group.add_argument('-t', '--tree',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
65 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
66 required=True,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
67 help="Output filename where save the input tree for GraPhlAn")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
68 group.add_argument('-a', '--annotation',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
69 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
70 required=True,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
71 help="Output filename where save GraPhlAn annotation")
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
72
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
73 # annotations
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
74 parser.add_argument('--annotations',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
75 default=None,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
76 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
77 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
78 help="List which levels should be annotated in the tree. Use a comma separate values form, e.g., "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
79 "--annotation_levels 1,2,3. Default is None")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
80 parser.add_argument('--external_annotations',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
81 default=None,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
82 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
83 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
84 help="List which levels should use the external legend for the annotation. Use a comma separate values form, "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
85 "e.g., --annotation_levels 1,2,3. Default is None")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
86 # shaded background
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
87 parser.add_argument('--background_levels',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
88 default=None,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
89 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
90 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
91 help="List which levels should be highlight with a shaded background. Use a comma separate values form, e.g., "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
92 "--background_levels 1,2,3. Default is None")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
93 parser.add_argument('--background_clades',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
94 default=None,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
95 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
96 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
97 help="Specify the clades that should be highlight with a shaded background. Use a comma separate values form "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
98 "and surround the string with \" if there are spaces. Example: --background_clades \"Bacteria.Actinobacteria, "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
99 "Bacteria.Bacteroidetes.Bacteroidia, Bacteria.Firmicutes.Clostridia.Clostridiales\". Default is None")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
100 parser.add_argument('--background_colors',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
101 default=None,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
102 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
103 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
104 help="Set the color to use for the shaded background. Colors can be either in RGB or HSV (using a semi-colon to "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
105 "separate values, surrounded with ()) format. Use a comma separate values form and surround the string with "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
106 "\" if it contains spaces. Example: --background_colors \"#29cc36, (150; 100; 100), (280; 80; 88)\". Default "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
107 "is None")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
108 # title
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
109 parser.add_argument('--title',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
110 type=str,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
111 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
112 help="If specified set the title of the GraPhlAn plot. Surround the string with \" if it contains spaces, e.g., "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
113 "--title \"Title example\"")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
114 # title font size
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
115 parser.add_argument('--title_font_size',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
116 default=15,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
117 type=int,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
118 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
119 help="Set the title font size. Default is 15")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
120 # clade size
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
121 parser.add_argument('--def_clade_size',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
122 default=10.,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
123 type=float,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
124 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
125 help="Set a default size for clades that are not found as biomarkers by LEfSe. Default is 10")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
126 parser.add_argument('--min_clade_size',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
127 default=20.,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
128 type=float,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
129 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
130 help="Set the minimum value of clades that are biomarkers. Default is 20")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
131 parser.add_argument('--max_clade_size',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
132 default=200.,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
133 type=float,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
134 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
135 help="Set the maximum value of clades that are biomarkers. Default is 200")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
136 # font size
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
137 parser.add_argument('--def_font_size',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
138 default=10,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
139 type=int,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
140 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
141 help="Set a default font size. Default is 10")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
142 parser.add_argument('--min_font_size',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
143 default=8,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
144 type=int,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
145 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
146 help="Set the minimum font size to use. Default is 8")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
147 parser.add_argument('--max_font_size',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
148 default=12,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
149 type=int,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
150 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
151 help="Set the maximum font size. Default is 12")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
152 # legend font size
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
153 parser.add_argument('--annotation_legend_font_size',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
154 default=10,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
155 type=int,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
156 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
157 help="Set the font size for the annotation legend. Default is 10")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
158 # abundance threshold
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
159 parser.add_argument('--abundance_threshold',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
160 default=20.,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
161 type=float,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
162 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
163 help="Set the minimun abundace value for a clade to be annotated. Default is 20.0")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
164 # ONLY lefse_input provided
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
165 parser.add_argument('--most_abundant',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
166 default=10,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
167 type=int,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
168 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
169 help="When only lefse_input is provided, you can specify how many clades highlight. Since the biomarkers are "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
170 "missing, they will be chosen from the most abundant. Default is 10")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
171 parser.add_argument('--least_biomarkers',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
172 default=3,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
173 type=int,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
174 required=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
175 help="When only lefse_input is provided, you can specify the minimum number of biomarkers to extract. The "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
176 "taxonomy is parsed, and the level is choosen in order to have at least the specified number of biomarkers. "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
177 "Default is 3")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
178 # decide to keep the OTU id or to merger at the above taxonomic level
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
179 parser.add_argument('--discard_otus',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
180 default=True,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
181 action='store_false',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
182 help="If specified the OTU ids will be discarde from the taxonmy. Default is True, i.e. keep OTUs IDs in taxonomy")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
183 # decide to keep the OTU id or to merger at the above taxonomic level
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
184 parser.add_argument('--internal_levels',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
185 default=False,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
186 action='store_true',
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
187 help="If specified sum-up from leaf to root the abundances values. Default is False, i.e. do not sum-up abundances "
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
188 "on the internal nodes")
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
189
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
190 DataMatrix.input_parameters(parser)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
191 args = parser.parse_args()
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
192
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
193 # check if at least one of the input params is given
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
194 if (not args.lefse_input) and (not args.lefse_output):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
195 raise Exception("[read_params()] You must provide at least one of the two input parameters: ")
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
196
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
197 # check that min_clade_size is less than max_clade_size
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
198 if args.min_clade_size > args.max_clade_size:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
199 print "[W] min_clade_size cannot be greater than max_clade_size, assigning their default values"
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
200 args.min_clade_size = 20.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
201 args.max_clade_size = 200.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
202
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
203 # check that min_font_size is less than max_font_size
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
204 if args.min_font_size > args.max_font_size:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
205 print "[W] min_font_size cannot be greater than max_font_size, assigning their default values"
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
206 args.min_font_size = 8
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
207 args.max_font_size = 12
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
208
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
209 return args
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
210
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
211
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
212 def get_file_type(filename):
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
213 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
214 Return the extension (if any) of the ``filename`` in lower case.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
215 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
216 return filename[filename.rfind('.')+1:].lower()
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
217
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
218
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
219 def parse_biom(filename, keep_otus=True, internal_levels=False):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
220 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
221 Load a biom table and extract the taxonomy (from metadata), removing the unuseful header.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
222 Return the input biom in tab-separated format.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
223 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
224 from biom import load_table # avoid to ask for the BIOM library if there is no biom file
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
225
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
226 biom_table = load_table(filename)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
227 strs = biom_table.delimited_self(header_value='TAXA', header_key='taxonomy')
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
228 lst1 = [str(s) for s in strs.split('\n')[1:]] # skip the "# Constructed from biom file" entry
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
229 biom_file = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
230 out = [lst1[0]] # save the header
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
231 pre_taxa = compile(".__")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
232 classs = compile("\(class\)")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
233
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
234 # consistency check
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
235 i = 0
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
236 while i < (len(lst1)-1):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
237 if len([s for s in lst1[i].split('\t')]) != len([s for s in lst1[i+1].split('\t')]):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
238 raise Exception('[parse_biom()] It seems that taxonomic metadata are missing, maybe is the wrong biom file?')
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
239
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
240 i += 1
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
241
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
242 for l in lst1[1:]:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
243 otu = None
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
244 lst = [float(s.strip()) for s in l.split('\t')[1:-1]]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
245
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
246 if keep_otus:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
247 otu = l.split('\t')[0]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
248
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
249 # Clean and move taxa in first place
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
250 taxa = '.'.join([s.strip().replace('[', '').replace('u\'', '').replace(']', '').replace(' ', '').replace('\'', '')
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
251 for s in l.split('\t')[-1].split(',')])
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
252 taxa = pre_taxa.sub('', taxa) # remove '{k|p|o|g|s}__'
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
253 taxa = classs.sub('', taxa) # remove '(class)'
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
254 taxa = taxa.rstrip('.') # remove trailing dots
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
255
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
256 if otu:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
257 taxa = taxa + '.' + otu
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
258
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
259 biom_file.append([taxa] + lst)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
260
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
261 # merge such rows that have the same taxa
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
262 i = 1
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
263 dic = {}
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
264
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
265 for l in biom_file[i:]:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
266 if l[0] not in dic:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
267 dic[l[0]] = l[1:]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
268
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
269 for k in biom_file[i+1:]:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
270 if l[0] == k[0]:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
271 lst = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
272 lstdic = dic[l[0]]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
273 j = 1
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
274 while j < len(lstdic):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
275 lst.append(float(lstdic[j]) + float(k[j]))
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
276 j += 1
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
277
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
278 dic[l[0]] = lst
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
279 i += 1
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
280
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
281 feats = dict(dic)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
282
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
283 if internal_levels:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
284 feats = add_missing_levels(feats)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
285
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
286 for k in feats:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
287 out.append('\t'.join([str(s) for s in [k] + feats[k]]))
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
288
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
289 return '\n'.join(out)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
290
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
291
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
292 def add_missing_levels(ff, summ=True):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
293 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
294 Sum-up the internal abundances from leaf to root
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
295 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
296 if sum([f.count(".") for f in ff]) < 1:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
297 return ff
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
298
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
299 clades2leaves = {}
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
300 for f in ff:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
301 fs = f.split(".")
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
302
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
303 if len(fs) < 2:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
304 continue
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
305
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
306 for l in range(1, len(fs)+1):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
307 n = ".".join(fs[:l])
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
308
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
309 if n in clades2leaves:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
310 clades2leaves[n].append(f)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
311 else:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
312 clades2leaves[n] = [f]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
313
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
314 ret = {}
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
315 for k in clades2leaves:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
316 if summ:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
317 ret[k] = [sum([sum(ff[e]) for e in clades2leaves[k]])]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
318 else:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
319 lst = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
320 for e in clades2leaves[k]:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
321 if not lst:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
322 for i in ff[e]:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
323 lst.append(i)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
324 else:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
325 lst1 = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
326 i = 0
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
327 while i < len(lst):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
328 lst1.append(lst[i] + ff[e][i])
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
329 i += 1
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
330 lst = lst1
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
331
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
332 ret[k] = lst
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
333
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
334 return ret
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
335
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
336
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
337 def get_most_abundant(abundances, xxx):
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
338 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
339 Sort by the abundance level all the taxonomy that represent at least two levels.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
340 Return the first ``xxx`` most abundant.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
341 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
342 abundant = []
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
343
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
344 for a in abundances:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
345 if a.count('|') > 0:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
346 abundant.append((float(abundances[a]), a.replace('|', '.')))
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
347 elif a.count('.') > 0:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
348 abundant.append((float(abundances[a]), a))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
349
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
350 abundant.sort(reverse=True)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
351 return abundant[:xxx]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
352
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
353
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
354 def get_biomarkes(abundant, xxx):
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
355 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
356 Split the taxonomy and then look, level by level, when there are at least ``xxx`` distinct branches.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
357 Return the set of branches as biomarkers to highlight.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
358 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
359 cc = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
360 bk = set()
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
361 lvl = 0
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
362
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
363 for _, t in abundant:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
364 cc.append(t.split('.'))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
365
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
366 while lvl < len(max(cc)):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
367 bk = set()
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
368
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
369 for c in cc:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
370 if lvl < len(c):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
371 bk |= set([c[lvl]])
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
372
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
373 if len(bk) >= xxx:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
374 break
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
375
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
376 lvl += 1
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
377
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
378 return bk
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
379
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
380 def scale_clade_size(minn, maxx, abu, max_abu):
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
381 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
382 Return the value of ``abu`` scaled to ``max_abu`` logarithmically, and then map from ``minn`` to ``maxx``.
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
383 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
384 return minn + maxx*log(1. + (abu/max_abu))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
385
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
386
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
387 def main():
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
388 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
389 """
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
390 colors = [(245., 90., 100.), (125., 80., 80.), (0., 80., 100.), (195., 100., 100.),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
391 (150., 100., 100.), (55., 100., 100.), (280., 80., 88.)] # HSV format
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
392 args = read_params()
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
393 lefse_input = None
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
394 lefse_output = {}
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
395 color = {}
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
396 biomarkers = set()
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
397 taxa = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
398 abundances = {}
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
399 max_abundances = None
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
400 max_effect_size = None
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
401 max_log_effect_size = None
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
402 background_list = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
403 background_clades = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
404 background_colors = {}
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
405 annotations_list = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
406 external_annotations_list = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
407 lin = False
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
408 lout = False
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
409
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
410 # get the levels that should be shaded
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
411 if args.background_levels:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
412 background_list = [int(i.strip()) for i in args.background_levels.strip().split(',')]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
413
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
414 # get the background_clades
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
415 if args.background_clades:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
416 if get_file_type(args.background_colors) in ['txt']:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
417 with open(args.background_clades, 'r') as f:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
418 background_clades = [str(s.strip()) for s in f]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
419 else: # it's a string in csv format
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
420 background_clades = [str(s.strip()) for s in args.background_clades.split(',')]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
421
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
422 # read the set of colors to use for the background_clades
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
423 if args.background_colors:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
424 col = []
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
425
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
426 if get_file_type(args.background_colors) in ['txt']:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
427 with open(args.background_colors, 'r') as f:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
428 col = [str(s.strip()) for s in f]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
429 else: # it's a string in csv format
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
430 col = [c.strip() for c in args.background_colors.split(',')]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
431
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
432 lst = {}
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
433 i = 0
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
434
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
435 for c in background_clades:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
436 cc = c[:c.find('.')]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
437
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
438 if cc not in lst:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
439 background_colors[c] = col[i % len(col)]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
440 lst[cc] = col[i % len(col)]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
441 i += 1
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
442 else:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
443 background_colors[c] = lst[cc]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
444
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
445 # get the levels that will use the internal annotation
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
446 if args.annotations:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
447 annotations_list = [int(i.strip()) for i in args.annotations.strip().split(',')]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
448
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
449 # get the levels that will use the external legend annotation
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
450 if args.external_annotations:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
451 external_annotations_list = [int(i.strip()) for i in args.external_annotations.strip().split(',')]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
452
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
453 if args.lefse_input:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
454 # if the lefse_input is in biom format, convert it
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
455 if get_file_type(args.lefse_input) in 'biom':
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
456 try:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
457 biom = parse_biom(args.lefse_input, args.discard_otus, args.internal_levels)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
458 lefse_input = DataMatrix(StringIO(biom), args)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
459 except Exception as e:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
460 lin = True
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
461 print e
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
462 else:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
463 if args.internal_levels:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
464 aaa = {}
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
465 header = None
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
466 with open(args.lefse_input, 'r') as f:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
467 for r in f:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
468 if header is None:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
469 header = [s.strip() for s in r.split('\t')]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
470 else:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
471 row = r.split('\t')
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
472 aaa[row[0].strip().replace('|', '.')] = [float(s.strip()) for s in row[1:]]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
473
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
474 feats = add_missing_levels(aaa, summ=False)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
475 ss = '\t'.join(header)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
476 ss += '\n'
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
477 ss += '\n'.join(['\t'.join([str(s) for s in [k] + feats[k]]) for k in feats])
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
478 lefse_input = DataMatrix(StringIO(ss), args)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
479 else:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
480 lefse_input = DataMatrix(args.lefse_input, args)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
481
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
482 if not lin:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
483 taxa = [t.replace('|', '.').strip() for t in lefse_input.get_fnames()] # build taxonomy list
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
484 abundances = dict(lefse_input.get_averages())
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
485 max_abundances = max([abundances[x] for x in abundances])
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
486 else: # no lefse_input provided
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
487 lin = True
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
488
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
489 if args.lefse_output:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
490 # if the lefse_output is in biom format... I don't think it's possible!
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
491 if get_file_type(args.lefse_output) in 'biom':
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
492 lout = True
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
493 print "Seriously?? LEfSe output file is not expected to be in biom format!"
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
494 else:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
495 lst = []
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
496
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
497 with open(args.lefse_output, 'r') as out_file:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
498 for line in out_file:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
499 t, m, bk, es, pv = line.strip().split('\t')
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
500 lefse_output[t] = (es, bk, m, pv)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
501
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
502 # get distinct biomarkers
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
503 if bk:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
504 biomarkers |= set([bk])
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
505
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
506 # get all effect size
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
507 if es:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
508 lst.append(float(es))
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
509
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
510 max_effect_size = max(lst)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
511
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
512 # no lefse_input file provided!
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
513 if (not taxa) and (not abundances): # build taxonomy list and abundaces map
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
514 for t in lefse_output:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
515 _, _, m, _ = lefse_output[t]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
516 abundances[t.replace('.', '|')] = float(m)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
517
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
518 max_abundances = max([abundances[x] for x in abundances])
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
519
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
520 for t in lefse_output:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
521 scaled = scale_clade_size(args.min_clade_size, args.max_clade_size,
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
522 abundances[t.replace('.', '|')], max_abundances)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
523
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
524 if scaled >= args.abundance_threshold:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
525 taxa.append(t)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
526 elif not lin: # no lefse_output provided and lefse_input correctly red
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
527 lout = True
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
528
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
529 # find the xxx most abundant
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
530 abundant = get_most_abundant(abundances, args.most_abundant)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
531
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
532 # find the taxonomy level with at least yyy distinct childs from the xxx most abundant
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
533 biomarkers = get_biomarkes(abundant, args.least_biomarkers)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
534
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
535 # compose lefse_output variable
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
536 for _, t in abundant:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
537 b = ''
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
538
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
539 for bk in biomarkers:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
540 if bk in t:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
541 b = bk
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
542
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
543 lefse_output[t] = (2., b, '', '')
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
544
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
545 max_effect_size = 1. # It's not gonna working
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
546 # no lefse_output and no lefse_input provided
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
547 if lin and lout:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
548 print "You must provide at least one input file!"
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
549 exit(1)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
550
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
551 # write the tree
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
552 with open(args.tree, 'w') as tree_file:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
553 tree_file.write('\n'.join(taxa))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
554
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
555 # for each biomarker assign it to a different color
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
556 i = 0
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
557
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
558 for bk in biomarkers:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
559 color[bk] = i % len(colors)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
560 i += 1
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
561
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
562 # find max log abs value of effect size
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
563 if lefse_output:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
564 lst = []
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
565
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
566 for t in lefse_output:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
567 es, _, _, _ = lefse_output[t]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
568
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
569 if es:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
570 lst.append(abs(log(float(es) / max_effect_size)))
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
571
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
572 max_log_effect_size = max(lst)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
573
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
574 # write the annotation
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
575 try:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
576 with open(args.annotation, 'w') as annot_file:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
577 # set the title
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
578 if args.title:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
579 annot_file.write('\n'.join(['\t'.join(['title', args.title]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
580 '\t'.join(['title_font_size', str(args.title_font_size)]), '\n']))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
581
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
582 # write some basic customizations
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
583 annot_file.write('\n'.join(['\t'.join(['clade_separation', '0.5']),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
584 '\t'.join(['branch_bracket_depth', '0.8']),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
585 '\t'.join(['branch_bracket_width', '0.2']),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
586 '\t'.join(['annotation_legend_font_size', str(args.annotation_legend_font_size)]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
587 '\t'.join(['class_legend_font_size', '10']),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
588 '\t'.join(['class_legend_marker_size', '1.5']), '\n']))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
589
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
590 # write the biomarkers' legend
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
591 for bk in biomarkers:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
592 biom = bk.replace('_', ' ').upper()
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
593 rgb = scale_color(colors[color[bk]])
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
594 annot_file.write('\n'.join(['\t'.join([biom, 'annotation', biom]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
595 '\t'.join([biom, 'clade_marker_color', rgb]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
596 '\t'.join([biom, 'clade_marker_size', '40']), '\n']))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
597
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
598 # write the annotation for the tree
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
599 for taxonomy in taxa:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
600 level = taxonomy.count('.') + 1 # which level is this taxonomy?
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
601 clean_taxonomy = taxonomy[taxonomy.rfind('.') + 1:] # retrieve the last level in taxonomy
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
602 cleanest_taxonomy = clean_taxonomy.replace('_', ' ') # substitute '_' with ' '
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
603 scaled = args.def_clade_size
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
604
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
605 # scaled the size of the clade by the average abundance
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
606 if (taxonomy in abundances) or (taxonomy.replace('.', '|') in abundances):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
607 try:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
608 abu = abundances[taxonomy.replace('.', '|')]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
609 except:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
610 abu = abundances[taxonomy]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
611
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
612 scaled = scale_clade_size(args.min_clade_size, args.max_clade_size, abu, max_abundances)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
613
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
614 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'clade_marker_size', str(scaled)]), '\n']))
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
615
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
616 # put a bakcground annotation to the levels specified by the user
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
617 shaded_background = []
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
618
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
619 for l in background_list:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
620 if level >= l:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
621 lst = [s.strip() for s in taxonomy.strip().split('.')]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
622 t = '.'.join(lst[:l])
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
623
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
624 if t not in shaded_background:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
625 shaded_background.append(t)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
626
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
627 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / l)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
628
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
629 annot_file.write('\n'.join(['\t'.join([t, 'annotation_background_color', args.background_color]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
630 '\t'.join([t, 'annotation', t.replace('_', ' ')]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
631 '\t'.join([t, 'annotation_font_size', str(font_size)]), '\n']))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
632
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
633 # put a bakcground annotation to the clades specified by the user
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
634 for c in background_colors:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
635 bg_color = background_colors[c]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
636
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
637 if not bg_color.startswith('#'):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
638 bg_color = bg_color.replace('(', '').replace(')', '')
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
639 h, s, v = bg_color.split(';')
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
640 bg_color = scale_color((float(h.strip()) , float(s.strip()), float(v.strip())))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
641
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
642 # check if the taxonomy has more than one level
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
643 lvls = [str(cc.strip()) for cc in c.split('.')]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
644 done_clades = []
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
645
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
646 for l in lvls:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
647 if (l in taxonomy) and (l not in done_clades):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
648 lvl = taxonomy[:taxonomy.index(l)].count('.') + 1
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
649 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / lvl)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
650
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
651 annot_file.write('\n'.join(['\t'.join([l, 'annotation_background_color', bg_color]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
652 '\t'.join([l, 'annotation', l.replace('_', ' ')]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
653 '\t'.join([l, 'annotation_font_size', str(font_size)]), '\n']))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
654
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
655 done_clades.append(l)
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
656
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
657 if lefse_output:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
658 if taxonomy in lefse_output:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
659 es, bk, _, _ = lefse_output[taxonomy]
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
660
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
661 # if it is a biomarker then color and label it!
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
662 if bk:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
663 fac = abs(log(float(es) / max_effect_size)) / max_log_effect_size
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
664
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
665 try:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
666 rgbs = scale_color(colors[color[bk]], fac)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
667 except Exception as e:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
668 print e
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
669 print ' '.join(["[W] Assign to", taxonomy, "the default color:", colors[color[bk]]])
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
670 rgbs = colors[color[bk]]
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
671
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
672 annot_file.write(''.join(['\t'.join([clean_taxonomy, 'clade_marker_color', rgbs]), '\n']))
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
673
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
674 # write the annotation only if the abundance is above a given threshold and it is either
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
675 # internal or external annotation lists
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
676 if (scaled >= args.abundance_threshold) and \
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
677 ((level in annotations_list) or (level in external_annotations_list)):
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
678 font_size = args.min_font_size + ((args.max_font_size - args.min_font_size) / level)
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
679 annotation = cleanest_taxonomy if level in annotations_list else '*:' + cleanest_taxonomy
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
680
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
681 annot_file.write('\n'.join(['\t'.join([clean_taxonomy, 'annotation_background_color', rgbs]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
682 '\t'.join([clean_taxonomy, 'annotation', annotation]),
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
683 '\t'.join([clean_taxonomy, 'annotation_font_size', str(font_size)]), '\n']))
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
684 except Exception as e:
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
685 print e
0
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
686
cac6247cb1d3 graphlan_import
george-weingart
parents:
diff changeset
687
28
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
688 if __name__ == '__main__':
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
689 main()
82fb838d02dc Uploaded updated version of the programs
george-weingart
parents: 0
diff changeset
690