Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/docutils/transforms/frontmatter.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author | bcclaywell |
---|---|
date | Mon, 12 Oct 2015 17:43:33 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d67268158946 |
---|---|
1 # $Id: frontmatter.py 7595 2013-01-21 17:33:56Z milde $ | |
2 # Author: David Goodger, Ueli Schlaepfer <goodger@python.org> | |
3 # Copyright: This module has been placed in the public domain. | |
4 | |
5 """ | |
6 Transforms related to the front matter of a document or a section | |
7 (information found before the main text): | |
8 | |
9 - `DocTitle`: Used to transform a lone top level section's title to | |
10 the document title, promote a remaining lone top-level section's | |
11 title to the document subtitle, and determine the document's title | |
12 metadata (document['title']) based on the document title and/or the | |
13 "title" setting. | |
14 | |
15 - `SectionSubTitle`: Used to transform a lone subsection into a | |
16 subtitle. | |
17 | |
18 - `DocInfo`: Used to transform a bibliographic field list into docinfo | |
19 elements. | |
20 """ | |
21 | |
22 __docformat__ = 'reStructuredText' | |
23 | |
24 import re | |
25 from docutils import nodes, utils | |
26 from docutils.transforms import TransformError, Transform | |
27 | |
28 | |
29 class TitlePromoter(Transform): | |
30 | |
31 """ | |
32 Abstract base class for DocTitle and SectionSubTitle transforms. | |
33 """ | |
34 | |
35 def promote_title(self, node): | |
36 """ | |
37 Transform the following tree:: | |
38 | |
39 <node> | |
40 <section> | |
41 <title> | |
42 ... | |
43 | |
44 into :: | |
45 | |
46 <node> | |
47 <title> | |
48 ... | |
49 | |
50 `node` is normally a document. | |
51 """ | |
52 # Type check | |
53 if not isinstance(node, nodes.Element): | |
54 raise TypeError, 'node must be of Element-derived type.' | |
55 | |
56 # `node` must not have a title yet. | |
57 assert not (len(node) and isinstance(node[0], nodes.title)) | |
58 section, index = self.candidate_index(node) | |
59 if index is None: | |
60 return None | |
61 | |
62 # Transfer the section's attributes to the node: | |
63 # NOTE: Change second parameter to False to NOT replace | |
64 # attributes that already exist in node with those in | |
65 # section | |
66 # NOTE: Remove third parameter to NOT copy the 'source' | |
67 # attribute from section | |
68 node.update_all_atts_concatenating(section, True, True) | |
69 | |
70 # setup_child is called automatically for all nodes. | |
71 node[:] = (section[:1] # section title | |
72 + node[:index] # everything that was in the | |
73 # node before the section | |
74 + section[1:]) # everything that was in the section | |
75 assert isinstance(node[0], nodes.title) | |
76 return 1 | |
77 | |
78 def promote_subtitle(self, node): | |
79 """ | |
80 Transform the following node tree:: | |
81 | |
82 <node> | |
83 <title> | |
84 <section> | |
85 <title> | |
86 ... | |
87 | |
88 into :: | |
89 | |
90 <node> | |
91 <title> | |
92 <subtitle> | |
93 ... | |
94 """ | |
95 # Type check | |
96 if not isinstance(node, nodes.Element): | |
97 raise TypeError, 'node must be of Element-derived type.' | |
98 | |
99 subsection, index = self.candidate_index(node) | |
100 if index is None: | |
101 return None | |
102 subtitle = nodes.subtitle() | |
103 | |
104 # Transfer the subsection's attributes to the new subtitle | |
105 # NOTE: Change second parameter to False to NOT replace | |
106 # attributes that already exist in node with those in | |
107 # section | |
108 # NOTE: Remove third parameter to NOT copy the 'source' | |
109 # attribute from section | |
110 subtitle.update_all_atts_concatenating(subsection, True, True) | |
111 | |
112 # Transfer the contents of the subsection's title to the | |
113 # subtitle: | |
114 subtitle[:] = subsection[0][:] | |
115 node[:] = (node[:1] # title | |
116 + [subtitle] | |
117 # everything that was before the section: | |
118 + node[1:index] | |
119 # everything that was in the subsection: | |
120 + subsection[1:]) | |
121 return 1 | |
122 | |
123 def candidate_index(self, node): | |
124 """ | |
125 Find and return the promotion candidate and its index. | |
126 | |
127 Return (None, None) if no valid candidate was found. | |
128 """ | |
129 index = node.first_child_not_matching_class( | |
130 nodes.PreBibliographic) | |
131 if index is None or len(node) > (index + 1) or \ | |
132 not isinstance(node[index], nodes.section): | |
133 return None, None | |
134 else: | |
135 return node[index], index | |
136 | |
137 | |
138 class DocTitle(TitlePromoter): | |
139 | |
140 """ | |
141 In reStructuredText_, there is no way to specify a document title | |
142 and subtitle explicitly. Instead, we can supply the document title | |
143 (and possibly the subtitle as well) implicitly, and use this | |
144 two-step transform to "raise" or "promote" the title(s) (and their | |
145 corresponding section contents) to the document level. | |
146 | |
147 1. If the document contains a single top-level section as its | |
148 first non-comment element, the top-level section's title | |
149 becomes the document's title, and the top-level section's | |
150 contents become the document's immediate contents. The lone | |
151 top-level section header must be the first non-comment element | |
152 in the document. | |
153 | |
154 For example, take this input text:: | |
155 | |
156 ================= | |
157 Top-Level Title | |
158 ================= | |
159 | |
160 A paragraph. | |
161 | |
162 Once parsed, it looks like this:: | |
163 | |
164 <document> | |
165 <section names="top-level title"> | |
166 <title> | |
167 Top-Level Title | |
168 <paragraph> | |
169 A paragraph. | |
170 | |
171 After running the DocTitle transform, we have:: | |
172 | |
173 <document names="top-level title"> | |
174 <title> | |
175 Top-Level Title | |
176 <paragraph> | |
177 A paragraph. | |
178 | |
179 2. If step 1 successfully determines the document title, we | |
180 continue by checking for a subtitle. | |
181 | |
182 If the lone top-level section itself contains a single | |
183 second-level section as its first non-comment element, that | |
184 section's title is promoted to the document's subtitle, and | |
185 that section's contents become the document's immediate | |
186 contents. Given this input text:: | |
187 | |
188 ================= | |
189 Top-Level Title | |
190 ================= | |
191 | |
192 Second-Level Title | |
193 ~~~~~~~~~~~~~~~~~~ | |
194 | |
195 A paragraph. | |
196 | |
197 After parsing and running the Section Promotion transform, the | |
198 result is:: | |
199 | |
200 <document names="top-level title"> | |
201 <title> | |
202 Top-Level Title | |
203 <subtitle names="second-level title"> | |
204 Second-Level Title | |
205 <paragraph> | |
206 A paragraph. | |
207 | |
208 (Note that the implicit hyperlink target generated by the | |
209 "Second-Level Title" is preserved on the "subtitle" element | |
210 itself.) | |
211 | |
212 Any comment elements occurring before the document title or | |
213 subtitle are accumulated and inserted as the first body elements | |
214 after the title(s). | |
215 | |
216 This transform also sets the document's metadata title | |
217 (document['title']). | |
218 | |
219 .. _reStructuredText: http://docutils.sf.net/rst.html | |
220 """ | |
221 | |
222 default_priority = 320 | |
223 | |
224 def set_metadata(self): | |
225 """ | |
226 Set document['title'] metadata title from the following | |
227 sources, listed in order of priority: | |
228 | |
229 * Existing document['title'] attribute. | |
230 * "title" setting. | |
231 * Document title node (as promoted by promote_title). | |
232 """ | |
233 if not self.document.hasattr('title'): | |
234 if self.document.settings.title is not None: | |
235 self.document['title'] = self.document.settings.title | |
236 elif len(self.document) and isinstance(self.document[0], nodes.title): | |
237 self.document['title'] = self.document[0].astext() | |
238 | |
239 def apply(self): | |
240 if getattr(self.document.settings, 'doctitle_xform', 1): | |
241 # promote_(sub)title defined in TitlePromoter base class. | |
242 if self.promote_title(self.document): | |
243 # If a title has been promoted, also try to promote a | |
244 # subtitle. | |
245 self.promote_subtitle(self.document) | |
246 # Set document['title']. | |
247 self.set_metadata() | |
248 | |
249 | |
250 class SectionSubTitle(TitlePromoter): | |
251 | |
252 """ | |
253 This works like document subtitles, but for sections. For example, :: | |
254 | |
255 <section> | |
256 <title> | |
257 Title | |
258 <section> | |
259 <title> | |
260 Subtitle | |
261 ... | |
262 | |
263 is transformed into :: | |
264 | |
265 <section> | |
266 <title> | |
267 Title | |
268 <subtitle> | |
269 Subtitle | |
270 ... | |
271 | |
272 For details refer to the docstring of DocTitle. | |
273 """ | |
274 | |
275 default_priority = 350 | |
276 | |
277 def apply(self): | |
278 if not getattr(self.document.settings, 'sectsubtitle_xform', 1): | |
279 return | |
280 for section in self.document.traverse(nodes.section): | |
281 # On our way through the node tree, we are deleting | |
282 # sections, but we call self.promote_subtitle for those | |
283 # sections nonetheless. To do: Write a test case which | |
284 # shows the problem and discuss on Docutils-develop. | |
285 self.promote_subtitle(section) | |
286 | |
287 | |
288 class DocInfo(Transform): | |
289 | |
290 """ | |
291 This transform is specific to the reStructuredText_ markup syntax; | |
292 see "Bibliographic Fields" in the `reStructuredText Markup | |
293 Specification`_ for a high-level description. This transform | |
294 should be run *after* the `DocTitle` transform. | |
295 | |
296 Given a field list as the first non-comment element after the | |
297 document title and subtitle (if present), registered bibliographic | |
298 field names are transformed to the corresponding DTD elements, | |
299 becoming child elements of the "docinfo" element (except for a | |
300 dedication and/or an abstract, which become "topic" elements after | |
301 "docinfo"). | |
302 | |
303 For example, given this document fragment after parsing:: | |
304 | |
305 <document> | |
306 <title> | |
307 Document Title | |
308 <field_list> | |
309 <field> | |
310 <field_name> | |
311 Author | |
312 <field_body> | |
313 <paragraph> | |
314 A. Name | |
315 <field> | |
316 <field_name> | |
317 Status | |
318 <field_body> | |
319 <paragraph> | |
320 $RCSfile$ | |
321 ... | |
322 | |
323 After running the bibliographic field list transform, the | |
324 resulting document tree would look like this:: | |
325 | |
326 <document> | |
327 <title> | |
328 Document Title | |
329 <docinfo> | |
330 <author> | |
331 A. Name | |
332 <status> | |
333 frontmatter.py | |
334 ... | |
335 | |
336 The "Status" field contained an expanded RCS keyword, which is | |
337 normally (but optionally) cleaned up by the transform. The sole | |
338 contents of the field body must be a paragraph containing an | |
339 expanded RCS keyword of the form "$keyword: expansion text $". Any | |
340 RCS keyword can be processed in any bibliographic field. The | |
341 dollar signs and leading RCS keyword name are removed. Extra | |
342 processing is done for the following RCS keywords: | |
343 | |
344 - "RCSfile" expands to the name of the file in the RCS or CVS | |
345 repository, which is the name of the source file with a ",v" | |
346 suffix appended. The transform will remove the ",v" suffix. | |
347 | |
348 - "Date" expands to the format "YYYY/MM/DD hh:mm:ss" (in the UTC | |
349 time zone). The RCS Keywords transform will extract just the | |
350 date itself and transform it to an ISO 8601 format date, as in | |
351 "2000-12-31". | |
352 | |
353 (Since the source file for this text is itself stored under CVS, | |
354 we can't show an example of the "Date" RCS keyword because we | |
355 can't prevent any RCS keywords used in this explanation from | |
356 being expanded. Only the "RCSfile" keyword is stable; its | |
357 expansion text changes only if the file name changes.) | |
358 | |
359 .. _reStructuredText: http://docutils.sf.net/rst.html | |
360 .. _reStructuredText Markup Specification: | |
361 http://docutils.sf.net/docs/ref/rst/restructuredtext.html | |
362 """ | |
363 | |
364 default_priority = 340 | |
365 | |
366 biblio_nodes = { | |
367 'author': nodes.author, | |
368 'authors': nodes.authors, | |
369 'organization': nodes.organization, | |
370 'address': nodes.address, | |
371 'contact': nodes.contact, | |
372 'version': nodes.version, | |
373 'revision': nodes.revision, | |
374 'status': nodes.status, | |
375 'date': nodes.date, | |
376 'copyright': nodes.copyright, | |
377 'dedication': nodes.topic, | |
378 'abstract': nodes.topic} | |
379 """Canonical field name (lowcased) to node class name mapping for | |
380 bibliographic fields (field_list).""" | |
381 | |
382 def apply(self): | |
383 if not getattr(self.document.settings, 'docinfo_xform', 1): | |
384 return | |
385 document = self.document | |
386 index = document.first_child_not_matching_class( | |
387 nodes.PreBibliographic) | |
388 if index is None: | |
389 return | |
390 candidate = document[index] | |
391 if isinstance(candidate, nodes.field_list): | |
392 biblioindex = document.first_child_not_matching_class( | |
393 (nodes.Titular, nodes.Decorative)) | |
394 nodelist = self.extract_bibliographic(candidate) | |
395 del document[index] # untransformed field list (candidate) | |
396 document[biblioindex:biblioindex] = nodelist | |
397 | |
398 def extract_bibliographic(self, field_list): | |
399 docinfo = nodes.docinfo() | |
400 bibliofields = self.language.bibliographic_fields | |
401 labels = self.language.labels | |
402 topics = {'dedication': None, 'abstract': None} | |
403 for field in field_list: | |
404 try: | |
405 name = field[0][0].astext() | |
406 normedname = nodes.fully_normalize_name(name) | |
407 if not (len(field) == 2 and normedname in bibliofields | |
408 and self.check_empty_biblio_field(field, name)): | |
409 raise TransformError | |
410 canonical = bibliofields[normedname] | |
411 biblioclass = self.biblio_nodes[canonical] | |
412 if issubclass(biblioclass, nodes.TextElement): | |
413 if not self.check_compound_biblio_field(field, name): | |
414 raise TransformError | |
415 utils.clean_rcs_keywords( | |
416 field[1][0], self.rcs_keyword_substitutions) | |
417 docinfo.append(biblioclass('', '', *field[1][0])) | |
418 elif issubclass(biblioclass, nodes.authors): | |
419 self.extract_authors(field, name, docinfo) | |
420 elif issubclass(biblioclass, nodes.topic): | |
421 if topics[canonical]: | |
422 field[-1] += self.document.reporter.warning( | |
423 'There can only be one "%s" field.' % name, | |
424 base_node=field) | |
425 raise TransformError | |
426 title = nodes.title(name, labels[canonical]) | |
427 topics[canonical] = biblioclass( | |
428 '', title, classes=[canonical], *field[1].children) | |
429 else: | |
430 docinfo.append(biblioclass('', *field[1].children)) | |
431 except TransformError: | |
432 if len(field[-1]) == 1 \ | |
433 and isinstance(field[-1][0], nodes.paragraph): | |
434 utils.clean_rcs_keywords( | |
435 field[-1][0], self.rcs_keyword_substitutions) | |
436 docinfo.append(field) | |
437 nodelist = [] | |
438 if len(docinfo) != 0: | |
439 nodelist.append(docinfo) | |
440 for name in ('dedication', 'abstract'): | |
441 if topics[name]: | |
442 nodelist.append(topics[name]) | |
443 return nodelist | |
444 | |
445 def check_empty_biblio_field(self, field, name): | |
446 if len(field[-1]) < 1: | |
447 field[-1] += self.document.reporter.warning( | |
448 'Cannot extract empty bibliographic field "%s".' % name, | |
449 base_node=field) | |
450 return None | |
451 return 1 | |
452 | |
453 def check_compound_biblio_field(self, field, name): | |
454 if len(field[-1]) > 1: | |
455 field[-1] += self.document.reporter.warning( | |
456 'Cannot extract compound bibliographic field "%s".' % name, | |
457 base_node=field) | |
458 return None | |
459 if not isinstance(field[-1][0], nodes.paragraph): | |
460 field[-1] += self.document.reporter.warning( | |
461 'Cannot extract bibliographic field "%s" containing ' | |
462 'anything other than a single paragraph.' % name, | |
463 base_node=field) | |
464 return None | |
465 return 1 | |
466 | |
467 rcs_keyword_substitutions = [ | |
468 (re.compile(r'\$' r'Date: (\d\d\d\d)[-/](\d\d)[-/](\d\d)[ T][\d:]+' | |
469 r'[^$]* \$', re.IGNORECASE), r'\1-\2-\3'), | |
470 (re.compile(r'\$' r'RCSfile: (.+),v \$', re.IGNORECASE), r'\1'), | |
471 (re.compile(r'\$[a-zA-Z]+: (.+) \$'), r'\1'),] | |
472 | |
473 def extract_authors(self, field, name, docinfo): | |
474 try: | |
475 if len(field[1]) == 1: | |
476 if isinstance(field[1][0], nodes.paragraph): | |
477 authors = self.authors_from_one_paragraph(field) | |
478 elif isinstance(field[1][0], nodes.bullet_list): | |
479 authors = self.authors_from_bullet_list(field) | |
480 else: | |
481 raise TransformError | |
482 else: | |
483 authors = self.authors_from_paragraphs(field) | |
484 authornodes = [nodes.author('', '', *author) | |
485 for author in authors if author] | |
486 if len(authornodes) >= 1: | |
487 docinfo.append(nodes.authors('', *authornodes)) | |
488 else: | |
489 raise TransformError | |
490 except TransformError: | |
491 field[-1] += self.document.reporter.warning( | |
492 'Bibliographic field "%s" incompatible with extraction: ' | |
493 'it must contain either a single paragraph (with authors ' | |
494 'separated by one of "%s"), multiple paragraphs (one per ' | |
495 'author), or a bullet list with one paragraph (one author) ' | |
496 'per item.' | |
497 % (name, ''.join(self.language.author_separators)), | |
498 base_node=field) | |
499 raise | |
500 | |
501 def authors_from_one_paragraph(self, field): | |
502 text = field[1][0].astext().strip() | |
503 if not text: | |
504 raise TransformError | |
505 for authorsep in self.language.author_separators: | |
506 authornames = text.split(authorsep) | |
507 if len(authornames) > 1: | |
508 break | |
509 authornames = [author.strip() for author in authornames] | |
510 authors = [[nodes.Text(author)] for author in authornames if author] | |
511 return authors | |
512 | |
513 def authors_from_bullet_list(self, field): | |
514 authors = [] | |
515 for item in field[1][0]: | |
516 if len(item) != 1 or not isinstance(item[0], nodes.paragraph): | |
517 raise TransformError | |
518 authors.append(item[0].children) | |
519 if not authors: | |
520 raise TransformError | |
521 return authors | |
522 | |
523 def authors_from_paragraphs(self, field): | |
524 for item in field[1]: | |
525 if not isinstance(item, nodes.paragraph): | |
526 raise TransformError | |
527 authors = [item.children for item in field[1]] | |
528 return authors |