Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/yaml/emitter.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 | |
2 # Emitter expects events obeying the following grammar: | |
3 # stream ::= STREAM-START document* STREAM-END | |
4 # document ::= DOCUMENT-START node DOCUMENT-END | |
5 # node ::= SCALAR | sequence | mapping | |
6 # sequence ::= SEQUENCE-START node* SEQUENCE-END | |
7 # mapping ::= MAPPING-START (node node)* MAPPING-END | |
8 | |
9 __all__ = ['Emitter', 'EmitterError'] | |
10 | |
11 from error import YAMLError | |
12 from events import * | |
13 | |
14 class EmitterError(YAMLError): | |
15 pass | |
16 | |
17 class ScalarAnalysis(object): | |
18 def __init__(self, scalar, empty, multiline, | |
19 allow_flow_plain, allow_block_plain, | |
20 allow_single_quoted, allow_double_quoted, | |
21 allow_block): | |
22 self.scalar = scalar | |
23 self.empty = empty | |
24 self.multiline = multiline | |
25 self.allow_flow_plain = allow_flow_plain | |
26 self.allow_block_plain = allow_block_plain | |
27 self.allow_single_quoted = allow_single_quoted | |
28 self.allow_double_quoted = allow_double_quoted | |
29 self.allow_block = allow_block | |
30 | |
31 class Emitter(object): | |
32 | |
33 DEFAULT_TAG_PREFIXES = { | |
34 u'!' : u'!', | |
35 u'tag:yaml.org,2002:' : u'!!', | |
36 } | |
37 | |
38 def __init__(self, stream, canonical=None, indent=None, width=None, | |
39 allow_unicode=None, line_break=None): | |
40 | |
41 # The stream should have the methods `write` and possibly `flush`. | |
42 self.stream = stream | |
43 | |
44 # Encoding can be overriden by STREAM-START. | |
45 self.encoding = None | |
46 | |
47 # Emitter is a state machine with a stack of states to handle nested | |
48 # structures. | |
49 self.states = [] | |
50 self.state = self.expect_stream_start | |
51 | |
52 # Current event and the event queue. | |
53 self.events = [] | |
54 self.event = None | |
55 | |
56 # The current indentation level and the stack of previous indents. | |
57 self.indents = [] | |
58 self.indent = None | |
59 | |
60 # Flow level. | |
61 self.flow_level = 0 | |
62 | |
63 # Contexts. | |
64 self.root_context = False | |
65 self.sequence_context = False | |
66 self.mapping_context = False | |
67 self.simple_key_context = False | |
68 | |
69 # Characteristics of the last emitted character: | |
70 # - current position. | |
71 # - is it a whitespace? | |
72 # - is it an indention character | |
73 # (indentation space, '-', '?', or ':')? | |
74 self.line = 0 | |
75 self.column = 0 | |
76 self.whitespace = True | |
77 self.indention = True | |
78 | |
79 # Whether the document requires an explicit document indicator | |
80 self.open_ended = False | |
81 | |
82 # Formatting details. | |
83 self.canonical = canonical | |
84 self.allow_unicode = allow_unicode | |
85 self.best_indent = 2 | |
86 if indent and 1 < indent < 10: | |
87 self.best_indent = indent | |
88 self.best_width = 80 | |
89 if width and width > self.best_indent*2: | |
90 self.best_width = width | |
91 self.best_line_break = u'\n' | |
92 if line_break in [u'\r', u'\n', u'\r\n']: | |
93 self.best_line_break = line_break | |
94 | |
95 # Tag prefixes. | |
96 self.tag_prefixes = None | |
97 | |
98 # Prepared anchor and tag. | |
99 self.prepared_anchor = None | |
100 self.prepared_tag = None | |
101 | |
102 # Scalar analysis and style. | |
103 self.analysis = None | |
104 self.style = None | |
105 | |
106 def dispose(self): | |
107 # Reset the state attributes (to clear self-references) | |
108 self.states = [] | |
109 self.state = None | |
110 | |
111 def emit(self, event): | |
112 self.events.append(event) | |
113 while not self.need_more_events(): | |
114 self.event = self.events.pop(0) | |
115 self.state() | |
116 self.event = None | |
117 | |
118 # In some cases, we wait for a few next events before emitting. | |
119 | |
120 def need_more_events(self): | |
121 if not self.events: | |
122 return True | |
123 event = self.events[0] | |
124 if isinstance(event, DocumentStartEvent): | |
125 return self.need_events(1) | |
126 elif isinstance(event, SequenceStartEvent): | |
127 return self.need_events(2) | |
128 elif isinstance(event, MappingStartEvent): | |
129 return self.need_events(3) | |
130 else: | |
131 return False | |
132 | |
133 def need_events(self, count): | |
134 level = 0 | |
135 for event in self.events[1:]: | |
136 if isinstance(event, (DocumentStartEvent, CollectionStartEvent)): | |
137 level += 1 | |
138 elif isinstance(event, (DocumentEndEvent, CollectionEndEvent)): | |
139 level -= 1 | |
140 elif isinstance(event, StreamEndEvent): | |
141 level = -1 | |
142 if level < 0: | |
143 return False | |
144 return (len(self.events) < count+1) | |
145 | |
146 def increase_indent(self, flow=False, indentless=False): | |
147 self.indents.append(self.indent) | |
148 if self.indent is None: | |
149 if flow: | |
150 self.indent = self.best_indent | |
151 else: | |
152 self.indent = 0 | |
153 elif not indentless: | |
154 self.indent += self.best_indent | |
155 | |
156 # States. | |
157 | |
158 # Stream handlers. | |
159 | |
160 def expect_stream_start(self): | |
161 if isinstance(self.event, StreamStartEvent): | |
162 if self.event.encoding and not getattr(self.stream, 'encoding', None): | |
163 self.encoding = self.event.encoding | |
164 self.write_stream_start() | |
165 self.state = self.expect_first_document_start | |
166 else: | |
167 raise EmitterError("expected StreamStartEvent, but got %s" | |
168 % self.event) | |
169 | |
170 def expect_nothing(self): | |
171 raise EmitterError("expected nothing, but got %s" % self.event) | |
172 | |
173 # Document handlers. | |
174 | |
175 def expect_first_document_start(self): | |
176 return self.expect_document_start(first=True) | |
177 | |
178 def expect_document_start(self, first=False): | |
179 if isinstance(self.event, DocumentStartEvent): | |
180 if (self.event.version or self.event.tags) and self.open_ended: | |
181 self.write_indicator(u'...', True) | |
182 self.write_indent() | |
183 if self.event.version: | |
184 version_text = self.prepare_version(self.event.version) | |
185 self.write_version_directive(version_text) | |
186 self.tag_prefixes = self.DEFAULT_TAG_PREFIXES.copy() | |
187 if self.event.tags: | |
188 handles = self.event.tags.keys() | |
189 handles.sort() | |
190 for handle in handles: | |
191 prefix = self.event.tags[handle] | |
192 self.tag_prefixes[prefix] = handle | |
193 handle_text = self.prepare_tag_handle(handle) | |
194 prefix_text = self.prepare_tag_prefix(prefix) | |
195 self.write_tag_directive(handle_text, prefix_text) | |
196 implicit = (first and not self.event.explicit and not self.canonical | |
197 and not self.event.version and not self.event.tags | |
198 and not self.check_empty_document()) | |
199 if not implicit: | |
200 self.write_indent() | |
201 self.write_indicator(u'---', True) | |
202 if self.canonical: | |
203 self.write_indent() | |
204 self.state = self.expect_document_root | |
205 elif isinstance(self.event, StreamEndEvent): | |
206 if self.open_ended: | |
207 self.write_indicator(u'...', True) | |
208 self.write_indent() | |
209 self.write_stream_end() | |
210 self.state = self.expect_nothing | |
211 else: | |
212 raise EmitterError("expected DocumentStartEvent, but got %s" | |
213 % self.event) | |
214 | |
215 def expect_document_end(self): | |
216 if isinstance(self.event, DocumentEndEvent): | |
217 self.write_indent() | |
218 if self.event.explicit: | |
219 self.write_indicator(u'...', True) | |
220 self.write_indent() | |
221 self.flush_stream() | |
222 self.state = self.expect_document_start | |
223 else: | |
224 raise EmitterError("expected DocumentEndEvent, but got %s" | |
225 % self.event) | |
226 | |
227 def expect_document_root(self): | |
228 self.states.append(self.expect_document_end) | |
229 self.expect_node(root=True) | |
230 | |
231 # Node handlers. | |
232 | |
233 def expect_node(self, root=False, sequence=False, mapping=False, | |
234 simple_key=False): | |
235 self.root_context = root | |
236 self.sequence_context = sequence | |
237 self.mapping_context = mapping | |
238 self.simple_key_context = simple_key | |
239 if isinstance(self.event, AliasEvent): | |
240 self.expect_alias() | |
241 elif isinstance(self.event, (ScalarEvent, CollectionStartEvent)): | |
242 self.process_anchor(u'&') | |
243 self.process_tag() | |
244 if isinstance(self.event, ScalarEvent): | |
245 self.expect_scalar() | |
246 elif isinstance(self.event, SequenceStartEvent): | |
247 if self.flow_level or self.canonical or self.event.flow_style \ | |
248 or self.check_empty_sequence(): | |
249 self.expect_flow_sequence() | |
250 else: | |
251 self.expect_block_sequence() | |
252 elif isinstance(self.event, MappingStartEvent): | |
253 if self.flow_level or self.canonical or self.event.flow_style \ | |
254 or self.check_empty_mapping(): | |
255 self.expect_flow_mapping() | |
256 else: | |
257 self.expect_block_mapping() | |
258 else: | |
259 raise EmitterError("expected NodeEvent, but got %s" % self.event) | |
260 | |
261 def expect_alias(self): | |
262 if self.event.anchor is None: | |
263 raise EmitterError("anchor is not specified for alias") | |
264 self.process_anchor(u'*') | |
265 self.state = self.states.pop() | |
266 | |
267 def expect_scalar(self): | |
268 self.increase_indent(flow=True) | |
269 self.process_scalar() | |
270 self.indent = self.indents.pop() | |
271 self.state = self.states.pop() | |
272 | |
273 # Flow sequence handlers. | |
274 | |
275 def expect_flow_sequence(self): | |
276 self.write_indicator(u'[', True, whitespace=True) | |
277 self.flow_level += 1 | |
278 self.increase_indent(flow=True) | |
279 self.state = self.expect_first_flow_sequence_item | |
280 | |
281 def expect_first_flow_sequence_item(self): | |
282 if isinstance(self.event, SequenceEndEvent): | |
283 self.indent = self.indents.pop() | |
284 self.flow_level -= 1 | |
285 self.write_indicator(u']', False) | |
286 self.state = self.states.pop() | |
287 else: | |
288 if self.canonical or self.column > self.best_width: | |
289 self.write_indent() | |
290 self.states.append(self.expect_flow_sequence_item) | |
291 self.expect_node(sequence=True) | |
292 | |
293 def expect_flow_sequence_item(self): | |
294 if isinstance(self.event, SequenceEndEvent): | |
295 self.indent = self.indents.pop() | |
296 self.flow_level -= 1 | |
297 if self.canonical: | |
298 self.write_indicator(u',', False) | |
299 self.write_indent() | |
300 self.write_indicator(u']', False) | |
301 self.state = self.states.pop() | |
302 else: | |
303 self.write_indicator(u',', False) | |
304 if self.canonical or self.column > self.best_width: | |
305 self.write_indent() | |
306 self.states.append(self.expect_flow_sequence_item) | |
307 self.expect_node(sequence=True) | |
308 | |
309 # Flow mapping handlers. | |
310 | |
311 def expect_flow_mapping(self): | |
312 self.write_indicator(u'{', True, whitespace=True) | |
313 self.flow_level += 1 | |
314 self.increase_indent(flow=True) | |
315 self.state = self.expect_first_flow_mapping_key | |
316 | |
317 def expect_first_flow_mapping_key(self): | |
318 if isinstance(self.event, MappingEndEvent): | |
319 self.indent = self.indents.pop() | |
320 self.flow_level -= 1 | |
321 self.write_indicator(u'}', False) | |
322 self.state = self.states.pop() | |
323 else: | |
324 if self.canonical or self.column > self.best_width: | |
325 self.write_indent() | |
326 if not self.canonical and self.check_simple_key(): | |
327 self.states.append(self.expect_flow_mapping_simple_value) | |
328 self.expect_node(mapping=True, simple_key=True) | |
329 else: | |
330 self.write_indicator(u'?', True) | |
331 self.states.append(self.expect_flow_mapping_value) | |
332 self.expect_node(mapping=True) | |
333 | |
334 def expect_flow_mapping_key(self): | |
335 if isinstance(self.event, MappingEndEvent): | |
336 self.indent = self.indents.pop() | |
337 self.flow_level -= 1 | |
338 if self.canonical: | |
339 self.write_indicator(u',', False) | |
340 self.write_indent() | |
341 self.write_indicator(u'}', False) | |
342 self.state = self.states.pop() | |
343 else: | |
344 self.write_indicator(u',', False) | |
345 if self.canonical or self.column > self.best_width: | |
346 self.write_indent() | |
347 if not self.canonical and self.check_simple_key(): | |
348 self.states.append(self.expect_flow_mapping_simple_value) | |
349 self.expect_node(mapping=True, simple_key=True) | |
350 else: | |
351 self.write_indicator(u'?', True) | |
352 self.states.append(self.expect_flow_mapping_value) | |
353 self.expect_node(mapping=True) | |
354 | |
355 def expect_flow_mapping_simple_value(self): | |
356 self.write_indicator(u':', False) | |
357 self.states.append(self.expect_flow_mapping_key) | |
358 self.expect_node(mapping=True) | |
359 | |
360 def expect_flow_mapping_value(self): | |
361 if self.canonical or self.column > self.best_width: | |
362 self.write_indent() | |
363 self.write_indicator(u':', True) | |
364 self.states.append(self.expect_flow_mapping_key) | |
365 self.expect_node(mapping=True) | |
366 | |
367 # Block sequence handlers. | |
368 | |
369 def expect_block_sequence(self): | |
370 indentless = (self.mapping_context and not self.indention) | |
371 self.increase_indent(flow=False, indentless=indentless) | |
372 self.state = self.expect_first_block_sequence_item | |
373 | |
374 def expect_first_block_sequence_item(self): | |
375 return self.expect_block_sequence_item(first=True) | |
376 | |
377 def expect_block_sequence_item(self, first=False): | |
378 if not first and isinstance(self.event, SequenceEndEvent): | |
379 self.indent = self.indents.pop() | |
380 self.state = self.states.pop() | |
381 else: | |
382 self.write_indent() | |
383 self.write_indicator(u'-', True, indention=True) | |
384 self.states.append(self.expect_block_sequence_item) | |
385 self.expect_node(sequence=True) | |
386 | |
387 # Block mapping handlers. | |
388 | |
389 def expect_block_mapping(self): | |
390 self.increase_indent(flow=False) | |
391 self.state = self.expect_first_block_mapping_key | |
392 | |
393 def expect_first_block_mapping_key(self): | |
394 return self.expect_block_mapping_key(first=True) | |
395 | |
396 def expect_block_mapping_key(self, first=False): | |
397 if not first and isinstance(self.event, MappingEndEvent): | |
398 self.indent = self.indents.pop() | |
399 self.state = self.states.pop() | |
400 else: | |
401 self.write_indent() | |
402 if self.check_simple_key(): | |
403 self.states.append(self.expect_block_mapping_simple_value) | |
404 self.expect_node(mapping=True, simple_key=True) | |
405 else: | |
406 self.write_indicator(u'?', True, indention=True) | |
407 self.states.append(self.expect_block_mapping_value) | |
408 self.expect_node(mapping=True) | |
409 | |
410 def expect_block_mapping_simple_value(self): | |
411 self.write_indicator(u':', False) | |
412 self.states.append(self.expect_block_mapping_key) | |
413 self.expect_node(mapping=True) | |
414 | |
415 def expect_block_mapping_value(self): | |
416 self.write_indent() | |
417 self.write_indicator(u':', True, indention=True) | |
418 self.states.append(self.expect_block_mapping_key) | |
419 self.expect_node(mapping=True) | |
420 | |
421 # Checkers. | |
422 | |
423 def check_empty_sequence(self): | |
424 return (isinstance(self.event, SequenceStartEvent) and self.events | |
425 and isinstance(self.events[0], SequenceEndEvent)) | |
426 | |
427 def check_empty_mapping(self): | |
428 return (isinstance(self.event, MappingStartEvent) and self.events | |
429 and isinstance(self.events[0], MappingEndEvent)) | |
430 | |
431 def check_empty_document(self): | |
432 if not isinstance(self.event, DocumentStartEvent) or not self.events: | |
433 return False | |
434 event = self.events[0] | |
435 return (isinstance(event, ScalarEvent) and event.anchor is None | |
436 and event.tag is None and event.implicit and event.value == u'') | |
437 | |
438 def check_simple_key(self): | |
439 length = 0 | |
440 if isinstance(self.event, NodeEvent) and self.event.anchor is not None: | |
441 if self.prepared_anchor is None: | |
442 self.prepared_anchor = self.prepare_anchor(self.event.anchor) | |
443 length += len(self.prepared_anchor) | |
444 if isinstance(self.event, (ScalarEvent, CollectionStartEvent)) \ | |
445 and self.event.tag is not None: | |
446 if self.prepared_tag is None: | |
447 self.prepared_tag = self.prepare_tag(self.event.tag) | |
448 length += len(self.prepared_tag) | |
449 if isinstance(self.event, ScalarEvent): | |
450 if self.analysis is None: | |
451 self.analysis = self.analyze_scalar(self.event.value) | |
452 length += len(self.analysis.scalar) | |
453 return (length < 128 and (isinstance(self.event, AliasEvent) | |
454 or (isinstance(self.event, ScalarEvent) | |
455 and not self.analysis.empty and not self.analysis.multiline) | |
456 or self.check_empty_sequence() or self.check_empty_mapping())) | |
457 | |
458 # Anchor, Tag, and Scalar processors. | |
459 | |
460 def process_anchor(self, indicator): | |
461 if self.event.anchor is None: | |
462 self.prepared_anchor = None | |
463 return | |
464 if self.prepared_anchor is None: | |
465 self.prepared_anchor = self.prepare_anchor(self.event.anchor) | |
466 if self.prepared_anchor: | |
467 self.write_indicator(indicator+self.prepared_anchor, True) | |
468 self.prepared_anchor = None | |
469 | |
470 def process_tag(self): | |
471 tag = self.event.tag | |
472 if isinstance(self.event, ScalarEvent): | |
473 if self.style is None: | |
474 self.style = self.choose_scalar_style() | |
475 if ((not self.canonical or tag is None) and | |
476 ((self.style == '' and self.event.implicit[0]) | |
477 or (self.style != '' and self.event.implicit[1]))): | |
478 self.prepared_tag = None | |
479 return | |
480 if self.event.implicit[0] and tag is None: | |
481 tag = u'!' | |
482 self.prepared_tag = None | |
483 else: | |
484 if (not self.canonical or tag is None) and self.event.implicit: | |
485 self.prepared_tag = None | |
486 return | |
487 if tag is None: | |
488 raise EmitterError("tag is not specified") | |
489 if self.prepared_tag is None: | |
490 self.prepared_tag = self.prepare_tag(tag) | |
491 if self.prepared_tag: | |
492 self.write_indicator(self.prepared_tag, True) | |
493 self.prepared_tag = None | |
494 | |
495 def choose_scalar_style(self): | |
496 if self.analysis is None: | |
497 self.analysis = self.analyze_scalar(self.event.value) | |
498 if self.event.style == '"' or self.canonical: | |
499 return '"' | |
500 if not self.event.style and self.event.implicit[0]: | |
501 if (not (self.simple_key_context and | |
502 (self.analysis.empty or self.analysis.multiline)) | |
503 and (self.flow_level and self.analysis.allow_flow_plain | |
504 or (not self.flow_level and self.analysis.allow_block_plain))): | |
505 return '' | |
506 if self.event.style and self.event.style in '|>': | |
507 if (not self.flow_level and not self.simple_key_context | |
508 and self.analysis.allow_block): | |
509 return self.event.style | |
510 if not self.event.style or self.event.style == '\'': | |
511 if (self.analysis.allow_single_quoted and | |
512 not (self.simple_key_context and self.analysis.multiline)): | |
513 return '\'' | |
514 return '"' | |
515 | |
516 def process_scalar(self): | |
517 if self.analysis is None: | |
518 self.analysis = self.analyze_scalar(self.event.value) | |
519 if self.style is None: | |
520 self.style = self.choose_scalar_style() | |
521 split = (not self.simple_key_context) | |
522 #if self.analysis.multiline and split \ | |
523 # and (not self.style or self.style in '\'\"'): | |
524 # self.write_indent() | |
525 if self.style == '"': | |
526 self.write_double_quoted(self.analysis.scalar, split) | |
527 elif self.style == '\'': | |
528 self.write_single_quoted(self.analysis.scalar, split) | |
529 elif self.style == '>': | |
530 self.write_folded(self.analysis.scalar) | |
531 elif self.style == '|': | |
532 self.write_literal(self.analysis.scalar) | |
533 else: | |
534 self.write_plain(self.analysis.scalar, split) | |
535 self.analysis = None | |
536 self.style = None | |
537 | |
538 # Analyzers. | |
539 | |
540 def prepare_version(self, version): | |
541 major, minor = version | |
542 if major != 1: | |
543 raise EmitterError("unsupported YAML version: %d.%d" % (major, minor)) | |
544 return u'%d.%d' % (major, minor) | |
545 | |
546 def prepare_tag_handle(self, handle): | |
547 if not handle: | |
548 raise EmitterError("tag handle must not be empty") | |
549 if handle[0] != u'!' or handle[-1] != u'!': | |
550 raise EmitterError("tag handle must start and end with '!': %r" | |
551 % (handle.encode('utf-8'))) | |
552 for ch in handle[1:-1]: | |
553 if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ | |
554 or ch in u'-_'): | |
555 raise EmitterError("invalid character %r in the tag handle: %r" | |
556 % (ch.encode('utf-8'), handle.encode('utf-8'))) | |
557 return handle | |
558 | |
559 def prepare_tag_prefix(self, prefix): | |
560 if not prefix: | |
561 raise EmitterError("tag prefix must not be empty") | |
562 chunks = [] | |
563 start = end = 0 | |
564 if prefix[0] == u'!': | |
565 end = 1 | |
566 while end < len(prefix): | |
567 ch = prefix[end] | |
568 if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ | |
569 or ch in u'-;/?!:@&=+$,_.~*\'()[]': | |
570 end += 1 | |
571 else: | |
572 if start < end: | |
573 chunks.append(prefix[start:end]) | |
574 start = end = end+1 | |
575 data = ch.encode('utf-8') | |
576 for ch in data: | |
577 chunks.append(u'%%%02X' % ord(ch)) | |
578 if start < end: | |
579 chunks.append(prefix[start:end]) | |
580 return u''.join(chunks) | |
581 | |
582 def prepare_tag(self, tag): | |
583 if not tag: | |
584 raise EmitterError("tag must not be empty") | |
585 if tag == u'!': | |
586 return tag | |
587 handle = None | |
588 suffix = tag | |
589 prefixes = self.tag_prefixes.keys() | |
590 prefixes.sort() | |
591 for prefix in prefixes: | |
592 if tag.startswith(prefix) \ | |
593 and (prefix == u'!' or len(prefix) < len(tag)): | |
594 handle = self.tag_prefixes[prefix] | |
595 suffix = tag[len(prefix):] | |
596 chunks = [] | |
597 start = end = 0 | |
598 while end < len(suffix): | |
599 ch = suffix[end] | |
600 if u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ | |
601 or ch in u'-;/?:@&=+$,_.~*\'()[]' \ | |
602 or (ch == u'!' and handle != u'!'): | |
603 end += 1 | |
604 else: | |
605 if start < end: | |
606 chunks.append(suffix[start:end]) | |
607 start = end = end+1 | |
608 data = ch.encode('utf-8') | |
609 for ch in data: | |
610 chunks.append(u'%%%02X' % ord(ch)) | |
611 if start < end: | |
612 chunks.append(suffix[start:end]) | |
613 suffix_text = u''.join(chunks) | |
614 if handle: | |
615 return u'%s%s' % (handle, suffix_text) | |
616 else: | |
617 return u'!<%s>' % suffix_text | |
618 | |
619 def prepare_anchor(self, anchor): | |
620 if not anchor: | |
621 raise EmitterError("anchor must not be empty") | |
622 for ch in anchor: | |
623 if not (u'0' <= ch <= u'9' or u'A' <= ch <= u'Z' or u'a' <= ch <= u'z' \ | |
624 or ch in u'-_'): | |
625 raise EmitterError("invalid character %r in the anchor: %r" | |
626 % (ch.encode('utf-8'), anchor.encode('utf-8'))) | |
627 return anchor | |
628 | |
629 def analyze_scalar(self, scalar): | |
630 | |
631 # Empty scalar is a special case. | |
632 if not scalar: | |
633 return ScalarAnalysis(scalar=scalar, empty=True, multiline=False, | |
634 allow_flow_plain=False, allow_block_plain=True, | |
635 allow_single_quoted=True, allow_double_quoted=True, | |
636 allow_block=False) | |
637 | |
638 # Indicators and special characters. | |
639 block_indicators = False | |
640 flow_indicators = False | |
641 line_breaks = False | |
642 special_characters = False | |
643 | |
644 # Important whitespace combinations. | |
645 leading_space = False | |
646 leading_break = False | |
647 trailing_space = False | |
648 trailing_break = False | |
649 break_space = False | |
650 space_break = False | |
651 | |
652 # Check document indicators. | |
653 if scalar.startswith(u'---') or scalar.startswith(u'...'): | |
654 block_indicators = True | |
655 flow_indicators = True | |
656 | |
657 # First character or preceded by a whitespace. | |
658 preceeded_by_whitespace = True | |
659 | |
660 # Last character or followed by a whitespace. | |
661 followed_by_whitespace = (len(scalar) == 1 or | |
662 scalar[1] in u'\0 \t\r\n\x85\u2028\u2029') | |
663 | |
664 # The previous character is a space. | |
665 previous_space = False | |
666 | |
667 # The previous character is a break. | |
668 previous_break = False | |
669 | |
670 index = 0 | |
671 while index < len(scalar): | |
672 ch = scalar[index] | |
673 | |
674 # Check for indicators. | |
675 if index == 0: | |
676 # Leading indicators are special characters. | |
677 if ch in u'#,[]{}&*!|>\'\"%@`': | |
678 flow_indicators = True | |
679 block_indicators = True | |
680 if ch in u'?:': | |
681 flow_indicators = True | |
682 if followed_by_whitespace: | |
683 block_indicators = True | |
684 if ch == u'-' and followed_by_whitespace: | |
685 flow_indicators = True | |
686 block_indicators = True | |
687 else: | |
688 # Some indicators cannot appear within a scalar as well. | |
689 if ch in u',?[]{}': | |
690 flow_indicators = True | |
691 if ch == u':': | |
692 flow_indicators = True | |
693 if followed_by_whitespace: | |
694 block_indicators = True | |
695 if ch == u'#' and preceeded_by_whitespace: | |
696 flow_indicators = True | |
697 block_indicators = True | |
698 | |
699 # Check for line breaks, special, and unicode characters. | |
700 if ch in u'\n\x85\u2028\u2029': | |
701 line_breaks = True | |
702 if not (ch == u'\n' or u'\x20' <= ch <= u'\x7E'): | |
703 if (ch == u'\x85' or u'\xA0' <= ch <= u'\uD7FF' | |
704 or u'\uE000' <= ch <= u'\uFFFD') and ch != u'\uFEFF': | |
705 unicode_characters = True | |
706 if not self.allow_unicode: | |
707 special_characters = True | |
708 else: | |
709 special_characters = True | |
710 | |
711 # Detect important whitespace combinations. | |
712 if ch == u' ': | |
713 if index == 0: | |
714 leading_space = True | |
715 if index == len(scalar)-1: | |
716 trailing_space = True | |
717 if previous_break: | |
718 break_space = True | |
719 previous_space = True | |
720 previous_break = False | |
721 elif ch in u'\n\x85\u2028\u2029': | |
722 if index == 0: | |
723 leading_break = True | |
724 if index == len(scalar)-1: | |
725 trailing_break = True | |
726 if previous_space: | |
727 space_break = True | |
728 previous_space = False | |
729 previous_break = True | |
730 else: | |
731 previous_space = False | |
732 previous_break = False | |
733 | |
734 # Prepare for the next character. | |
735 index += 1 | |
736 preceeded_by_whitespace = (ch in u'\0 \t\r\n\x85\u2028\u2029') | |
737 followed_by_whitespace = (index+1 >= len(scalar) or | |
738 scalar[index+1] in u'\0 \t\r\n\x85\u2028\u2029') | |
739 | |
740 # Let's decide what styles are allowed. | |
741 allow_flow_plain = True | |
742 allow_block_plain = True | |
743 allow_single_quoted = True | |
744 allow_double_quoted = True | |
745 allow_block = True | |
746 | |
747 # Leading and trailing whitespaces are bad for plain scalars. | |
748 if (leading_space or leading_break | |
749 or trailing_space or trailing_break): | |
750 allow_flow_plain = allow_block_plain = False | |
751 | |
752 # We do not permit trailing spaces for block scalars. | |
753 if trailing_space: | |
754 allow_block = False | |
755 | |
756 # Spaces at the beginning of a new line are only acceptable for block | |
757 # scalars. | |
758 if break_space: | |
759 allow_flow_plain = allow_block_plain = allow_single_quoted = False | |
760 | |
761 # Spaces followed by breaks, as well as special character are only | |
762 # allowed for double quoted scalars. | |
763 if space_break or special_characters: | |
764 allow_flow_plain = allow_block_plain = \ | |
765 allow_single_quoted = allow_block = False | |
766 | |
767 # Although the plain scalar writer supports breaks, we never emit | |
768 # multiline plain scalars. | |
769 if line_breaks: | |
770 allow_flow_plain = allow_block_plain = False | |
771 | |
772 # Flow indicators are forbidden for flow plain scalars. | |
773 if flow_indicators: | |
774 allow_flow_plain = False | |
775 | |
776 # Block indicators are forbidden for block plain scalars. | |
777 if block_indicators: | |
778 allow_block_plain = False | |
779 | |
780 return ScalarAnalysis(scalar=scalar, | |
781 empty=False, multiline=line_breaks, | |
782 allow_flow_plain=allow_flow_plain, | |
783 allow_block_plain=allow_block_plain, | |
784 allow_single_quoted=allow_single_quoted, | |
785 allow_double_quoted=allow_double_quoted, | |
786 allow_block=allow_block) | |
787 | |
788 # Writers. | |
789 | |
790 def flush_stream(self): | |
791 if hasattr(self.stream, 'flush'): | |
792 self.stream.flush() | |
793 | |
794 def write_stream_start(self): | |
795 # Write BOM if needed. | |
796 if self.encoding and self.encoding.startswith('utf-16'): | |
797 self.stream.write(u'\uFEFF'.encode(self.encoding)) | |
798 | |
799 def write_stream_end(self): | |
800 self.flush_stream() | |
801 | |
802 def write_indicator(self, indicator, need_whitespace, | |
803 whitespace=False, indention=False): | |
804 if self.whitespace or not need_whitespace: | |
805 data = indicator | |
806 else: | |
807 data = u' '+indicator | |
808 self.whitespace = whitespace | |
809 self.indention = self.indention and indention | |
810 self.column += len(data) | |
811 self.open_ended = False | |
812 if self.encoding: | |
813 data = data.encode(self.encoding) | |
814 self.stream.write(data) | |
815 | |
816 def write_indent(self): | |
817 indent = self.indent or 0 | |
818 if not self.indention or self.column > indent \ | |
819 or (self.column == indent and not self.whitespace): | |
820 self.write_line_break() | |
821 if self.column < indent: | |
822 self.whitespace = True | |
823 data = u' '*(indent-self.column) | |
824 self.column = indent | |
825 if self.encoding: | |
826 data = data.encode(self.encoding) | |
827 self.stream.write(data) | |
828 | |
829 def write_line_break(self, data=None): | |
830 if data is None: | |
831 data = self.best_line_break | |
832 self.whitespace = True | |
833 self.indention = True | |
834 self.line += 1 | |
835 self.column = 0 | |
836 if self.encoding: | |
837 data = data.encode(self.encoding) | |
838 self.stream.write(data) | |
839 | |
840 def write_version_directive(self, version_text): | |
841 data = u'%%YAML %s' % version_text | |
842 if self.encoding: | |
843 data = data.encode(self.encoding) | |
844 self.stream.write(data) | |
845 self.write_line_break() | |
846 | |
847 def write_tag_directive(self, handle_text, prefix_text): | |
848 data = u'%%TAG %s %s' % (handle_text, prefix_text) | |
849 if self.encoding: | |
850 data = data.encode(self.encoding) | |
851 self.stream.write(data) | |
852 self.write_line_break() | |
853 | |
854 # Scalar streams. | |
855 | |
856 def write_single_quoted(self, text, split=True): | |
857 self.write_indicator(u'\'', True) | |
858 spaces = False | |
859 breaks = False | |
860 start = end = 0 | |
861 while end <= len(text): | |
862 ch = None | |
863 if end < len(text): | |
864 ch = text[end] | |
865 if spaces: | |
866 if ch is None or ch != u' ': | |
867 if start+1 == end and self.column > self.best_width and split \ | |
868 and start != 0 and end != len(text): | |
869 self.write_indent() | |
870 else: | |
871 data = text[start:end] | |
872 self.column += len(data) | |
873 if self.encoding: | |
874 data = data.encode(self.encoding) | |
875 self.stream.write(data) | |
876 start = end | |
877 elif breaks: | |
878 if ch is None or ch not in u'\n\x85\u2028\u2029': | |
879 if text[start] == u'\n': | |
880 self.write_line_break() | |
881 for br in text[start:end]: | |
882 if br == u'\n': | |
883 self.write_line_break() | |
884 else: | |
885 self.write_line_break(br) | |
886 self.write_indent() | |
887 start = end | |
888 else: | |
889 if ch is None or ch in u' \n\x85\u2028\u2029' or ch == u'\'': | |
890 if start < end: | |
891 data = text[start:end] | |
892 self.column += len(data) | |
893 if self.encoding: | |
894 data = data.encode(self.encoding) | |
895 self.stream.write(data) | |
896 start = end | |
897 if ch == u'\'': | |
898 data = u'\'\'' | |
899 self.column += 2 | |
900 if self.encoding: | |
901 data = data.encode(self.encoding) | |
902 self.stream.write(data) | |
903 start = end + 1 | |
904 if ch is not None: | |
905 spaces = (ch == u' ') | |
906 breaks = (ch in u'\n\x85\u2028\u2029') | |
907 end += 1 | |
908 self.write_indicator(u'\'', False) | |
909 | |
910 ESCAPE_REPLACEMENTS = { | |
911 u'\0': u'0', | |
912 u'\x07': u'a', | |
913 u'\x08': u'b', | |
914 u'\x09': u't', | |
915 u'\x0A': u'n', | |
916 u'\x0B': u'v', | |
917 u'\x0C': u'f', | |
918 u'\x0D': u'r', | |
919 u'\x1B': u'e', | |
920 u'\"': u'\"', | |
921 u'\\': u'\\', | |
922 u'\x85': u'N', | |
923 u'\xA0': u'_', | |
924 u'\u2028': u'L', | |
925 u'\u2029': u'P', | |
926 } | |
927 | |
928 def write_double_quoted(self, text, split=True): | |
929 self.write_indicator(u'"', True) | |
930 start = end = 0 | |
931 while end <= len(text): | |
932 ch = None | |
933 if end < len(text): | |
934 ch = text[end] | |
935 if ch is None or ch in u'"\\\x85\u2028\u2029\uFEFF' \ | |
936 or not (u'\x20' <= ch <= u'\x7E' | |
937 or (self.allow_unicode | |
938 and (u'\xA0' <= ch <= u'\uD7FF' | |
939 or u'\uE000' <= ch <= u'\uFFFD'))): | |
940 if start < end: | |
941 data = text[start:end] | |
942 self.column += len(data) | |
943 if self.encoding: | |
944 data = data.encode(self.encoding) | |
945 self.stream.write(data) | |
946 start = end | |
947 if ch is not None: | |
948 if ch in self.ESCAPE_REPLACEMENTS: | |
949 data = u'\\'+self.ESCAPE_REPLACEMENTS[ch] | |
950 elif ch <= u'\xFF': | |
951 data = u'\\x%02X' % ord(ch) | |
952 elif ch <= u'\uFFFF': | |
953 data = u'\\u%04X' % ord(ch) | |
954 else: | |
955 data = u'\\U%08X' % ord(ch) | |
956 self.column += len(data) | |
957 if self.encoding: | |
958 data = data.encode(self.encoding) | |
959 self.stream.write(data) | |
960 start = end+1 | |
961 if 0 < end < len(text)-1 and (ch == u' ' or start >= end) \ | |
962 and self.column+(end-start) > self.best_width and split: | |
963 data = text[start:end]+u'\\' | |
964 if start < end: | |
965 start = end | |
966 self.column += len(data) | |
967 if self.encoding: | |
968 data = data.encode(self.encoding) | |
969 self.stream.write(data) | |
970 self.write_indent() | |
971 self.whitespace = False | |
972 self.indention = False | |
973 if text[start] == u' ': | |
974 data = u'\\' | |
975 self.column += len(data) | |
976 if self.encoding: | |
977 data = data.encode(self.encoding) | |
978 self.stream.write(data) | |
979 end += 1 | |
980 self.write_indicator(u'"', False) | |
981 | |
982 def determine_block_hints(self, text): | |
983 hints = u'' | |
984 if text: | |
985 if text[0] in u' \n\x85\u2028\u2029': | |
986 hints += unicode(self.best_indent) | |
987 if text[-1] not in u'\n\x85\u2028\u2029': | |
988 hints += u'-' | |
989 elif len(text) == 1 or text[-2] in u'\n\x85\u2028\u2029': | |
990 hints += u'+' | |
991 return hints | |
992 | |
993 def write_folded(self, text): | |
994 hints = self.determine_block_hints(text) | |
995 self.write_indicator(u'>'+hints, True) | |
996 if hints[-1:] == u'+': | |
997 self.open_ended = True | |
998 self.write_line_break() | |
999 leading_space = True | |
1000 spaces = False | |
1001 breaks = True | |
1002 start = end = 0 | |
1003 while end <= len(text): | |
1004 ch = None | |
1005 if end < len(text): | |
1006 ch = text[end] | |
1007 if breaks: | |
1008 if ch is None or ch not in u'\n\x85\u2028\u2029': | |
1009 if not leading_space and ch is not None and ch != u' ' \ | |
1010 and text[start] == u'\n': | |
1011 self.write_line_break() | |
1012 leading_space = (ch == u' ') | |
1013 for br in text[start:end]: | |
1014 if br == u'\n': | |
1015 self.write_line_break() | |
1016 else: | |
1017 self.write_line_break(br) | |
1018 if ch is not None: | |
1019 self.write_indent() | |
1020 start = end | |
1021 elif spaces: | |
1022 if ch != u' ': | |
1023 if start+1 == end and self.column > self.best_width: | |
1024 self.write_indent() | |
1025 else: | |
1026 data = text[start:end] | |
1027 self.column += len(data) | |
1028 if self.encoding: | |
1029 data = data.encode(self.encoding) | |
1030 self.stream.write(data) | |
1031 start = end | |
1032 else: | |
1033 if ch is None or ch in u' \n\x85\u2028\u2029': | |
1034 data = text[start:end] | |
1035 self.column += len(data) | |
1036 if self.encoding: | |
1037 data = data.encode(self.encoding) | |
1038 self.stream.write(data) | |
1039 if ch is None: | |
1040 self.write_line_break() | |
1041 start = end | |
1042 if ch is not None: | |
1043 breaks = (ch in u'\n\x85\u2028\u2029') | |
1044 spaces = (ch == u' ') | |
1045 end += 1 | |
1046 | |
1047 def write_literal(self, text): | |
1048 hints = self.determine_block_hints(text) | |
1049 self.write_indicator(u'|'+hints, True) | |
1050 if hints[-1:] == u'+': | |
1051 self.open_ended = True | |
1052 self.write_line_break() | |
1053 breaks = True | |
1054 start = end = 0 | |
1055 while end <= len(text): | |
1056 ch = None | |
1057 if end < len(text): | |
1058 ch = text[end] | |
1059 if breaks: | |
1060 if ch is None or ch not in u'\n\x85\u2028\u2029': | |
1061 for br in text[start:end]: | |
1062 if br == u'\n': | |
1063 self.write_line_break() | |
1064 else: | |
1065 self.write_line_break(br) | |
1066 if ch is not None: | |
1067 self.write_indent() | |
1068 start = end | |
1069 else: | |
1070 if ch is None or ch in u'\n\x85\u2028\u2029': | |
1071 data = text[start:end] | |
1072 if self.encoding: | |
1073 data = data.encode(self.encoding) | |
1074 self.stream.write(data) | |
1075 if ch is None: | |
1076 self.write_line_break() | |
1077 start = end | |
1078 if ch is not None: | |
1079 breaks = (ch in u'\n\x85\u2028\u2029') | |
1080 end += 1 | |
1081 | |
1082 def write_plain(self, text, split=True): | |
1083 if self.root_context: | |
1084 self.open_ended = True | |
1085 if not text: | |
1086 return | |
1087 if not self.whitespace: | |
1088 data = u' ' | |
1089 self.column += len(data) | |
1090 if self.encoding: | |
1091 data = data.encode(self.encoding) | |
1092 self.stream.write(data) | |
1093 self.whitespace = False | |
1094 self.indention = False | |
1095 spaces = False | |
1096 breaks = False | |
1097 start = end = 0 | |
1098 while end <= len(text): | |
1099 ch = None | |
1100 if end < len(text): | |
1101 ch = text[end] | |
1102 if spaces: | |
1103 if ch != u' ': | |
1104 if start+1 == end and self.column > self.best_width and split: | |
1105 self.write_indent() | |
1106 self.whitespace = False | |
1107 self.indention = False | |
1108 else: | |
1109 data = text[start:end] | |
1110 self.column += len(data) | |
1111 if self.encoding: | |
1112 data = data.encode(self.encoding) | |
1113 self.stream.write(data) | |
1114 start = end | |
1115 elif breaks: | |
1116 if ch not in u'\n\x85\u2028\u2029': | |
1117 if text[start] == u'\n': | |
1118 self.write_line_break() | |
1119 for br in text[start:end]: | |
1120 if br == u'\n': | |
1121 self.write_line_break() | |
1122 else: | |
1123 self.write_line_break(br) | |
1124 self.write_indent() | |
1125 self.whitespace = False | |
1126 self.indention = False | |
1127 start = end | |
1128 else: | |
1129 if ch is None or ch in u' \n\x85\u2028\u2029': | |
1130 data = text[start:end] | |
1131 self.column += len(data) | |
1132 if self.encoding: | |
1133 data = data.encode(self.encoding) | |
1134 self.stream.write(data) | |
1135 start = end | |
1136 if ch is not None: | |
1137 spaces = (ch == u' ') | |
1138 breaks = (ch in u'\n\x85\u2028\u2029') | |
1139 end += 1 | |
1140 |