Mercurial > repos > yating-l > jbrowsearchivecreator
changeset 5:32a5addbbae8 draft
planemo upload for repository https://github.com/Yating-L/jbrowse-archive-creator.git commit d583ac16a6c6942730ea536eb59cc37941816030-dirty
| author | yating-l | 
|---|---|
| date | Thu, 18 May 2017 19:13:19 -0400 | 
| parents | b454a3fba174 | 
| children | f9760de55888 | 
| files | test/bin/python test/include/python2.7/Python-ast.h test/include/python2.7/Python.h test/include/python2.7/abstract.h test/include/python2.7/asdl.h test/include/python2.7/ast.h test/include/python2.7/bitset.h test/include/python2.7/boolobject.h test/include/python2.7/bufferobject.h test/include/python2.7/bytearrayobject.h test/include/python2.7/bytes_methods.h test/include/python2.7/bytesobject.h test/include/python2.7/cStringIO.h test/include/python2.7/cellobject.h test/include/python2.7/ceval.h test/include/python2.7/classobject.h test/include/python2.7/cobject.h test/include/python2.7/code.h test/include/python2.7/codecs.h test/include/python2.7/compile.h test/include/python2.7/complexobject.h test/include/python2.7/datetime.h test/include/python2.7/descrobject.h test/include/python2.7/dictobject.h test/include/python2.7/dtoa.h test/include/python2.7/enumobject.h test/include/python2.7/errcode.h test/include/python2.7/eval.h test/include/python2.7/fileobject.h test/include/python2.7/floatobject.h test/include/python2.7/frameobject.h test/include/python2.7/funcobject.h test/include/python2.7/genobject.h test/include/python2.7/graminit.h test/include/python2.7/grammar.h test/include/python2.7/import.h test/include/python2.7/intobject.h test/include/python2.7/intrcheck.h test/include/python2.7/iterobject.h test/include/python2.7/listobject.h test/include/python2.7/longintrepr.h test/include/python2.7/longobject.h test/include/python2.7/marshal.h test/include/python2.7/memoryobject.h test/include/python2.7/metagrammar.h test/include/python2.7/methodobject.h test/include/python2.7/modsupport.h test/include/python2.7/moduleobject.h test/include/python2.7/node.h test/include/python2.7/object.h test/include/python2.7/objimpl.h test/include/python2.7/opcode.h test/include/python2.7/osdefs.h test/include/python2.7/parsetok.h test/include/python2.7/patchlevel.h test/include/python2.7/pgen.h test/include/python2.7/pgenheaders.h test/include/python2.7/py_curses.h test/include/python2.7/pyarena.h test/include/python2.7/pycapsule.h test/include/python2.7/pyconfig.h test/include/python2.7/pyctype.h test/include/python2.7/pydebug.h test/include/python2.7/pyerrors.h test/include/python2.7/pyexpat.h test/include/python2.7/pyfpe.h test/include/python2.7/pygetopt.h test/include/python2.7/pymacconfig.h test/include/python2.7/pymactoolbox.h test/include/python2.7/pymath.h test/include/python2.7/pymem.h test/include/python2.7/pyport.h test/include/python2.7/pystate.h test/include/python2.7/pystrcmp.h test/include/python2.7/pystrtod.h test/include/python2.7/pythonrun.h test/include/python2.7/pythread.h test/include/python2.7/rangeobject.h test/include/python2.7/setobject.h test/include/python2.7/sliceobject.h test/include/python2.7/stringobject.h test/include/python2.7/structmember.h test/include/python2.7/structseq.h test/include/python2.7/symtable.h test/include/python2.7/sysmodule.h test/include/python2.7/timefuncs.h test/include/python2.7/token.h test/include/python2.7/traceback.h test/include/python2.7/tupleobject.h test/include/python2.7/ucnhash.h test/include/python2.7/unicodeobject.h test/include/python2.7/warnings.h test/include/python2.7/weakrefobject.h test/lib/python2.7/UserDict.py test/lib/python2.7/UserDict.pyc test/lib/python2.7/_abcoll.py test/lib/python2.7/_abcoll.pyc test/lib/python2.7/_weakrefset.py test/lib/python2.7/_weakrefset.pyc test/lib/python2.7/abc.py test/lib/python2.7/abc.pyc test/lib/python2.7/codecs.py test/lib/python2.7/codecs.pyc test/lib/python2.7/copy_reg.py test/lib/python2.7/copy_reg.pyc test/lib/python2.7/distutils/__init__.py test/lib/python2.7/distutils/__init__.pyc test/lib/python2.7/distutils/distutils.cfg test/lib/python2.7/encodings/__init__.py test/lib/python2.7/encodings/__init__.pyc test/lib/python2.7/encodings/aliases.py test/lib/python2.7/encodings/aliases.pyc test/lib/python2.7/encodings/ascii.py test/lib/python2.7/encodings/ascii.pyc test/lib/python2.7/encodings/base64_codec.py test/lib/python2.7/encodings/base64_codec.pyc test/lib/python2.7/encodings/big5.py test/lib/python2.7/encodings/big5.pyc test/lib/python2.7/encodings/big5hkscs.py test/lib/python2.7/encodings/big5hkscs.pyc test/lib/python2.7/encodings/bz2_codec.py test/lib/python2.7/encodings/bz2_codec.pyc test/lib/python2.7/encodings/charmap.py test/lib/python2.7/encodings/charmap.pyc test/lib/python2.7/encodings/cp037.py test/lib/python2.7/encodings/cp037.pyc test/lib/python2.7/encodings/cp1006.py test/lib/python2.7/encodings/cp1006.pyc test/lib/python2.7/encodings/cp1026.py test/lib/python2.7/encodings/cp1026.pyc test/lib/python2.7/encodings/cp1140.py test/lib/python2.7/encodings/cp1140.pyc test/lib/python2.7/encodings/cp1250.py test/lib/python2.7/encodings/cp1250.pyc test/lib/python2.7/encodings/cp1251.py test/lib/python2.7/encodings/cp1251.pyc test/lib/python2.7/encodings/cp1252.py test/lib/python2.7/encodings/cp1252.pyc test/lib/python2.7/encodings/cp1253.py test/lib/python2.7/encodings/cp1253.pyc test/lib/python2.7/encodings/cp1254.py test/lib/python2.7/encodings/cp1254.pyc test/lib/python2.7/encodings/cp1255.py test/lib/python2.7/encodings/cp1255.pyc test/lib/python2.7/encodings/cp1256.py test/lib/python2.7/encodings/cp1256.pyc test/lib/python2.7/encodings/cp1257.py test/lib/python2.7/encodings/cp1257.pyc test/lib/python2.7/encodings/cp1258.py test/lib/python2.7/encodings/cp1258.pyc test/lib/python2.7/encodings/cp424.py test/lib/python2.7/encodings/cp424.pyc test/lib/python2.7/encodings/cp437.py test/lib/python2.7/encodings/cp437.pyc test/lib/python2.7/encodings/cp500.py test/lib/python2.7/encodings/cp500.pyc test/lib/python2.7/encodings/cp720.py test/lib/python2.7/encodings/cp720.pyc test/lib/python2.7/encodings/cp737.py test/lib/python2.7/encodings/cp737.pyc test/lib/python2.7/encodings/cp775.py test/lib/python2.7/encodings/cp775.pyc test/lib/python2.7/encodings/cp850.py test/lib/python2.7/encodings/cp850.pyc test/lib/python2.7/encodings/cp852.py test/lib/python2.7/encodings/cp852.pyc test/lib/python2.7/encodings/cp855.py test/lib/python2.7/encodings/cp855.pyc test/lib/python2.7/encodings/cp856.py test/lib/python2.7/encodings/cp856.pyc test/lib/python2.7/encodings/cp857.py test/lib/python2.7/encodings/cp857.pyc test/lib/python2.7/encodings/cp858.py test/lib/python2.7/encodings/cp858.pyc test/lib/python2.7/encodings/cp860.py test/lib/python2.7/encodings/cp860.pyc test/lib/python2.7/encodings/cp861.py test/lib/python2.7/encodings/cp861.pyc test/lib/python2.7/encodings/cp862.py test/lib/python2.7/encodings/cp862.pyc test/lib/python2.7/encodings/cp863.py test/lib/python2.7/encodings/cp863.pyc test/lib/python2.7/encodings/cp864.py test/lib/python2.7/encodings/cp864.pyc test/lib/python2.7/encodings/cp865.py test/lib/python2.7/encodings/cp865.pyc test/lib/python2.7/encodings/cp866.py test/lib/python2.7/encodings/cp866.pyc test/lib/python2.7/encodings/cp869.py test/lib/python2.7/encodings/cp869.pyc test/lib/python2.7/encodings/cp874.py test/lib/python2.7/encodings/cp874.pyc test/lib/python2.7/encodings/cp875.py test/lib/python2.7/encodings/cp875.pyc test/lib/python2.7/encodings/cp932.py test/lib/python2.7/encodings/cp932.pyc test/lib/python2.7/encodings/cp949.py test/lib/python2.7/encodings/cp949.pyc test/lib/python2.7/encodings/cp950.py test/lib/python2.7/encodings/cp950.pyc test/lib/python2.7/encodings/euc_jis_2004.py test/lib/python2.7/encodings/euc_jis_2004.pyc test/lib/python2.7/encodings/euc_jisx0213.py test/lib/python2.7/encodings/euc_jisx0213.pyc test/lib/python2.7/encodings/euc_jp.py test/lib/python2.7/encodings/euc_jp.pyc test/lib/python2.7/encodings/euc_kr.py test/lib/python2.7/encodings/euc_kr.pyc test/lib/python2.7/encodings/gb18030.py test/lib/python2.7/encodings/gb18030.pyc test/lib/python2.7/encodings/gb2312.py test/lib/python2.7/encodings/gb2312.pyc test/lib/python2.7/encodings/gbk.py test/lib/python2.7/encodings/gbk.pyc test/lib/python2.7/encodings/hex_codec.py test/lib/python2.7/encodings/hex_codec.pyc test/lib/python2.7/encodings/hp_roman8.py test/lib/python2.7/encodings/hp_roman8.pyc test/lib/python2.7/encodings/hz.py test/lib/python2.7/encodings/hz.pyc test/lib/python2.7/encodings/idna.py test/lib/python2.7/encodings/idna.pyc test/lib/python2.7/encodings/iso2022_jp.py test/lib/python2.7/encodings/iso2022_jp.pyc test/lib/python2.7/encodings/iso2022_jp_1.py test/lib/python2.7/encodings/iso2022_jp_1.pyc test/lib/python2.7/encodings/iso2022_jp_2.py test/lib/python2.7/encodings/iso2022_jp_2.pyc test/lib/python2.7/encodings/iso2022_jp_2004.py test/lib/python2.7/encodings/iso2022_jp_2004.pyc test/lib/python2.7/encodings/iso2022_jp_3.py test/lib/python2.7/encodings/iso2022_jp_3.pyc test/lib/python2.7/encodings/iso2022_jp_ext.py test/lib/python2.7/encodings/iso2022_jp_ext.pyc test/lib/python2.7/encodings/iso2022_kr.py test/lib/python2.7/encodings/iso2022_kr.pyc test/lib/python2.7/encodings/iso8859_1.py test/lib/python2.7/encodings/iso8859_1.pyc test/lib/python2.7/encodings/iso8859_10.py test/lib/python2.7/encodings/iso8859_10.pyc test/lib/python2.7/encodings/iso8859_11.py test/lib/python2.7/encodings/iso8859_11.pyc test/lib/python2.7/encodings/iso8859_13.py test/lib/python2.7/encodings/iso8859_13.pyc test/lib/python2.7/encodings/iso8859_14.py test/lib/python2.7/encodings/iso8859_14.pyc test/lib/python2.7/encodings/iso8859_15.py test/lib/python2.7/encodings/iso8859_15.pyc test/lib/python2.7/encodings/iso8859_16.py test/lib/python2.7/encodings/iso8859_16.pyc test/lib/python2.7/encodings/iso8859_2.py test/lib/python2.7/encodings/iso8859_2.pyc test/lib/python2.7/encodings/iso8859_3.py test/lib/python2.7/encodings/iso8859_3.pyc test/lib/python2.7/encodings/iso8859_4.py test/lib/python2.7/encodings/iso8859_4.pyc test/lib/python2.7/encodings/iso8859_5.py test/lib/python2.7/encodings/iso8859_5.pyc test/lib/python2.7/encodings/iso8859_6.py test/lib/python2.7/encodings/iso8859_6.pyc test/lib/python2.7/encodings/iso8859_7.py test/lib/python2.7/encodings/iso8859_7.pyc test/lib/python2.7/encodings/iso8859_8.py test/lib/python2.7/encodings/iso8859_8.pyc test/lib/python2.7/encodings/iso8859_9.py test/lib/python2.7/encodings/iso8859_9.pyc test/lib/python2.7/encodings/johab.py test/lib/python2.7/encodings/johab.pyc test/lib/python2.7/encodings/koi8_r.py test/lib/python2.7/encodings/koi8_r.pyc test/lib/python2.7/encodings/koi8_u.py test/lib/python2.7/encodings/koi8_u.pyc test/lib/python2.7/encodings/latin_1.py test/lib/python2.7/encodings/latin_1.pyc test/lib/python2.7/encodings/mac_arabic.py test/lib/python2.7/encodings/mac_arabic.pyc test/lib/python2.7/encodings/mac_centeuro.py test/lib/python2.7/encodings/mac_centeuro.pyc test/lib/python2.7/encodings/mac_croatian.py test/lib/python2.7/encodings/mac_croatian.pyc test/lib/python2.7/encodings/mac_cyrillic.py test/lib/python2.7/encodings/mac_cyrillic.pyc test/lib/python2.7/encodings/mac_farsi.py test/lib/python2.7/encodings/mac_farsi.pyc test/lib/python2.7/encodings/mac_greek.py test/lib/python2.7/encodings/mac_greek.pyc test/lib/python2.7/encodings/mac_iceland.py test/lib/python2.7/encodings/mac_iceland.pyc test/lib/python2.7/encodings/mac_latin2.py test/lib/python2.7/encodings/mac_latin2.pyc test/lib/python2.7/encodings/mac_roman.py test/lib/python2.7/encodings/mac_roman.pyc test/lib/python2.7/encodings/mac_romanian.py test/lib/python2.7/encodings/mac_romanian.pyc test/lib/python2.7/encodings/mac_turkish.py test/lib/python2.7/encodings/mac_turkish.pyc test/lib/python2.7/encodings/mbcs.py test/lib/python2.7/encodings/mbcs.pyc test/lib/python2.7/encodings/palmos.py test/lib/python2.7/encodings/palmos.pyc test/lib/python2.7/encodings/ptcp154.py test/lib/python2.7/encodings/ptcp154.pyc test/lib/python2.7/encodings/punycode.py test/lib/python2.7/encodings/punycode.pyc test/lib/python2.7/encodings/quopri_codec.py test/lib/python2.7/encodings/quopri_codec.pyc test/lib/python2.7/encodings/raw_unicode_escape.py test/lib/python2.7/encodings/raw_unicode_escape.pyc test/lib/python2.7/encodings/rot_13.py test/lib/python2.7/encodings/rot_13.pyc test/lib/python2.7/encodings/shift_jis.py test/lib/python2.7/encodings/shift_jis.pyc test/lib/python2.7/encodings/shift_jis_2004.py test/lib/python2.7/encodings/shift_jis_2004.pyc test/lib/python2.7/encodings/shift_jisx0213.py test/lib/python2.7/encodings/shift_jisx0213.pyc test/lib/python2.7/encodings/string_escape.py test/lib/python2.7/encodings/string_escape.pyc test/lib/python2.7/encodings/tis_620.py test/lib/python2.7/encodings/tis_620.pyc test/lib/python2.7/encodings/undefined.py test/lib/python2.7/encodings/undefined.pyc test/lib/python2.7/encodings/unicode_escape.py test/lib/python2.7/encodings/unicode_escape.pyc test/lib/python2.7/encodings/unicode_internal.py test/lib/python2.7/encodings/unicode_internal.pyc test/lib/python2.7/encodings/utf_16.py test/lib/python2.7/encodings/utf_16.pyc test/lib/python2.7/encodings/utf_16_be.py test/lib/python2.7/encodings/utf_16_be.pyc test/lib/python2.7/encodings/utf_16_le.py test/lib/python2.7/encodings/utf_16_le.pyc test/lib/python2.7/encodings/utf_32.py test/lib/python2.7/encodings/utf_32.pyc test/lib/python2.7/encodings/utf_32_be.py test/lib/python2.7/encodings/utf_32_be.pyc test/lib/python2.7/encodings/utf_32_le.py test/lib/python2.7/encodings/utf_32_le.pyc test/lib/python2.7/encodings/utf_7.py test/lib/python2.7/encodings/utf_7.pyc test/lib/python2.7/encodings/utf_8.py test/lib/python2.7/encodings/utf_8.pyc test/lib/python2.7/encodings/utf_8_sig.py test/lib/python2.7/encodings/utf_8_sig.pyc test/lib/python2.7/encodings/uu_codec.py test/lib/python2.7/encodings/uu_codec.pyc test/lib/python2.7/encodings/zlib_codec.py test/lib/python2.7/encodings/zlib_codec.pyc test/lib/python2.7/fnmatch.py test/lib/python2.7/fnmatch.pyc test/lib/python2.7/genericpath.py test/lib/python2.7/genericpath.pyc test/lib/python2.7/lib-dynload/Python-2.7.egg-info test/lib/python2.7/lib-dynload/_bsddb.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_codecs_cn.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_codecs_hk.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_codecs_iso2022.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_codecs_jp.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_codecs_kr.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_codecs_tw.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_csv.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_ctypes.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_ctypes_test.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_curses.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_curses_panel.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_elementtree.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_hashlib.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_hotshot.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_json.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_lsprof.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_multibytecodec.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_multiprocessing.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_sqlite3.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_ssl.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/_testcapi.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/audioop.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/bz2.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/crypt.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/dbm.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/fpectl.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/future_builtins.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/linuxaudiodev.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/mmap.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/nis.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/ossaudiodev.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/parser.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/pyexpat.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/readline.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/resource.x86_64-linux-gnu.so test/lib/python2.7/lib-dynload/termios.x86_64-linux-gnu.so test/lib/python2.7/linecache.py test/lib/python2.7/linecache.pyc test/lib/python2.7/locale.py test/lib/python2.7/locale.pyc test/lib/python2.7/no-global-site-packages.txt test/lib/python2.7/ntpath.py test/lib/python2.7/orig-prefix.txt test/lib/python2.7/os.py test/lib/python2.7/os.pyc test/lib/python2.7/posixpath.py test/lib/python2.7/posixpath.pyc test/lib/python2.7/re.py test/lib/python2.7/re.pyc test/lib/python2.7/site.py test/lib/python2.7/site.pyc test/lib/python2.7/sre.py test/lib/python2.7/sre_compile.py test/lib/python2.7/sre_compile.pyc test/lib/python2.7/sre_constants.py test/lib/python2.7/sre_constants.pyc test/lib/python2.7/sre_parse.py test/lib/python2.7/sre_parse.pyc test/lib/python2.7/stat.py test/lib/python2.7/stat.pyc test/lib/python2.7/types.py test/lib/python2.7/types.pyc test/lib/python2.7/warnings.py test/lib/python2.7/warnings.pyc | 
| diffstat | 417 files changed, 0 insertions(+), 53515 deletions(-) [+] | 
line wrap: on
 line diff
--- a/test/include/python2.7/Python-ast.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,535 +0,0 @@ -/* File automatically generated by Parser/asdl_c.py. */ - -#include "asdl.h" - -typedef struct _mod *mod_ty; - -typedef struct _stmt *stmt_ty; - -typedef struct _expr *expr_ty; - -typedef enum _expr_context { Load=1, Store=2, Del=3, AugLoad=4, AugStore=5, - Param=6 } expr_context_ty; - -typedef struct _slice *slice_ty; - -typedef enum _boolop { And=1, Or=2 } boolop_ty; - -typedef enum _operator { Add=1, Sub=2, Mult=3, Div=4, Mod=5, Pow=6, LShift=7, - RShift=8, BitOr=9, BitXor=10, BitAnd=11, FloorDiv=12 } - operator_ty; - -typedef enum _unaryop { Invert=1, Not=2, UAdd=3, USub=4 } unaryop_ty; - -typedef enum _cmpop { Eq=1, NotEq=2, Lt=3, LtE=4, Gt=5, GtE=6, Is=7, IsNot=8, - In=9, NotIn=10 } cmpop_ty; - -typedef struct _comprehension *comprehension_ty; - -typedef struct _excepthandler *excepthandler_ty; - -typedef struct _arguments *arguments_ty; - -typedef struct _keyword *keyword_ty; - -typedef struct _alias *alias_ty; - - -enum _mod_kind {Module_kind=1, Interactive_kind=2, Expression_kind=3, - Suite_kind=4}; -struct _mod { - enum _mod_kind kind; - union { - struct { - asdl_seq *body; - } Module; - - struct { - asdl_seq *body; - } Interactive; - - struct { - expr_ty body; - } Expression; - - struct { - asdl_seq *body; - } Suite; - - } v; -}; - -enum _stmt_kind {FunctionDef_kind=1, ClassDef_kind=2, Return_kind=3, - Delete_kind=4, Assign_kind=5, AugAssign_kind=6, Print_kind=7, - For_kind=8, While_kind=9, If_kind=10, With_kind=11, - Raise_kind=12, TryExcept_kind=13, TryFinally_kind=14, - Assert_kind=15, Import_kind=16, ImportFrom_kind=17, - Exec_kind=18, Global_kind=19, Expr_kind=20, Pass_kind=21, - Break_kind=22, Continue_kind=23}; -struct _stmt { - enum _stmt_kind kind; - union { - struct { - identifier name; - arguments_ty args; - asdl_seq *body; - asdl_seq *decorator_list; - } FunctionDef; - - struct { - identifier name; - asdl_seq *bases; - asdl_seq *body; - asdl_seq *decorator_list; - } ClassDef; - - struct { - expr_ty value; - } Return; - - struct { - asdl_seq *targets; - } Delete; - - struct { - asdl_seq *targets; - expr_ty value; - } Assign; - - struct { - expr_ty target; - operator_ty op; - expr_ty value; - } AugAssign; - - struct { - expr_ty dest; - asdl_seq *values; - bool nl; - } Print; - - struct { - expr_ty target; - expr_ty iter; - asdl_seq *body; - asdl_seq *orelse; - } For; - - struct { - expr_ty test; - asdl_seq *body; - asdl_seq *orelse; - } While; - - struct { - expr_ty test; - asdl_seq *body; - asdl_seq *orelse; - } If; - - struct { - expr_ty context_expr; - expr_ty optional_vars; - asdl_seq *body; - } With; - - struct { - expr_ty type; - expr_ty inst; - expr_ty tback; - } Raise; - - struct { - asdl_seq *body; - asdl_seq *handlers; - asdl_seq *orelse; - } TryExcept; - - struct { - asdl_seq *body; - asdl_seq *finalbody; - } TryFinally; - - struct { - expr_ty test; - expr_ty msg; - } Assert; - - struct { - asdl_seq *names; - } Import; - - struct { - identifier module; - asdl_seq *names; - int level; - } ImportFrom; - - struct { - expr_ty body; - expr_ty globals; - expr_ty locals; - } Exec; - - struct { - asdl_seq *names; - } Global; - - struct { - expr_ty value; - } Expr; - - } v; - int lineno; - int col_offset; -}; - -enum _expr_kind {BoolOp_kind=1, BinOp_kind=2, UnaryOp_kind=3, Lambda_kind=4, - IfExp_kind=5, Dict_kind=6, Set_kind=7, ListComp_kind=8, - SetComp_kind=9, DictComp_kind=10, GeneratorExp_kind=11, - Yield_kind=12, Compare_kind=13, Call_kind=14, Repr_kind=15, - Num_kind=16, Str_kind=17, Attribute_kind=18, - Subscript_kind=19, Name_kind=20, List_kind=21, Tuple_kind=22}; -struct _expr { - enum _expr_kind kind; - union { - struct { - boolop_ty op; - asdl_seq *values; - } BoolOp; - - struct { - expr_ty left; - operator_ty op; - expr_ty right; - } BinOp; - - struct { - unaryop_ty op; - expr_ty operand; - } UnaryOp; - - struct { - arguments_ty args; - expr_ty body; - } Lambda; - - struct { - expr_ty test; - expr_ty body; - expr_ty orelse; - } IfExp; - - struct { - asdl_seq *keys; - asdl_seq *values; - } Dict; - - struct { - asdl_seq *elts; - } Set; - - struct { - expr_ty elt; - asdl_seq *generators; - } ListComp; - - struct { - expr_ty elt; - asdl_seq *generators; - } SetComp; - - struct { - expr_ty key; - expr_ty value; - asdl_seq *generators; - } DictComp; - - struct { - expr_ty elt; - asdl_seq *generators; - } GeneratorExp; - - struct { - expr_ty value; - } Yield; - - struct { - expr_ty left; - asdl_int_seq *ops; - asdl_seq *comparators; - } Compare; - - struct { - expr_ty func; - asdl_seq *args; - asdl_seq *keywords; - expr_ty starargs; - expr_ty kwargs; - } Call; - - struct { - expr_ty value; - } Repr; - - struct { - object n; - } Num; - - struct { - string s; - } Str; - - struct { - expr_ty value; - identifier attr; - expr_context_ty ctx; - } Attribute; - - struct { - expr_ty value; - slice_ty slice; - expr_context_ty ctx; - } Subscript; - - struct { - identifier id; - expr_context_ty ctx; - } Name; - - struct { - asdl_seq *elts; - expr_context_ty ctx; - } List; - - struct { - asdl_seq *elts; - expr_context_ty ctx; - } Tuple; - - } v; - int lineno; - int col_offset; -}; - -enum _slice_kind {Ellipsis_kind=1, Slice_kind=2, ExtSlice_kind=3, Index_kind=4}; -struct _slice { - enum _slice_kind kind; - union { - struct { - expr_ty lower; - expr_ty upper; - expr_ty step; - } Slice; - - struct { - asdl_seq *dims; - } ExtSlice; - - struct { - expr_ty value; - } Index; - - } v; -}; - -struct _comprehension { - expr_ty target; - expr_ty iter; - asdl_seq *ifs; -}; - -enum _excepthandler_kind {ExceptHandler_kind=1}; -struct _excepthandler { - enum _excepthandler_kind kind; - union { - struct { - expr_ty type; - expr_ty name; - asdl_seq *body; - } ExceptHandler; - - } v; - int lineno; - int col_offset; -}; - -struct _arguments { - asdl_seq *args; - identifier vararg; - identifier kwarg; - asdl_seq *defaults; -}; - -struct _keyword { - identifier arg; - expr_ty value; -}; - -struct _alias { - identifier name; - identifier asname; -}; - - -#define Module(a0, a1) _Py_Module(a0, a1) -mod_ty _Py_Module(asdl_seq * body, PyArena *arena); -#define Interactive(a0, a1) _Py_Interactive(a0, a1) -mod_ty _Py_Interactive(asdl_seq * body, PyArena *arena); -#define Expression(a0, a1) _Py_Expression(a0, a1) -mod_ty _Py_Expression(expr_ty body, PyArena *arena); -#define Suite(a0, a1) _Py_Suite(a0, a1) -mod_ty _Py_Suite(asdl_seq * body, PyArena *arena); -#define FunctionDef(a0, a1, a2, a3, a4, a5, a6) _Py_FunctionDef(a0, a1, a2, a3, a4, a5, a6) -stmt_ty _Py_FunctionDef(identifier name, arguments_ty args, asdl_seq * body, - asdl_seq * decorator_list, int lineno, int col_offset, - PyArena *arena); -#define ClassDef(a0, a1, a2, a3, a4, a5, a6) _Py_ClassDef(a0, a1, a2, a3, a4, a5, a6) -stmt_ty _Py_ClassDef(identifier name, asdl_seq * bases, asdl_seq * body, - asdl_seq * decorator_list, int lineno, int col_offset, - PyArena *arena); -#define Return(a0, a1, a2, a3) _Py_Return(a0, a1, a2, a3) -stmt_ty _Py_Return(expr_ty value, int lineno, int col_offset, PyArena *arena); -#define Delete(a0, a1, a2, a3) _Py_Delete(a0, a1, a2, a3) -stmt_ty _Py_Delete(asdl_seq * targets, int lineno, int col_offset, PyArena - *arena); -#define Assign(a0, a1, a2, a3, a4) _Py_Assign(a0, a1, a2, a3, a4) -stmt_ty _Py_Assign(asdl_seq * targets, expr_ty value, int lineno, int - col_offset, PyArena *arena); -#define AugAssign(a0, a1, a2, a3, a4, a5) _Py_AugAssign(a0, a1, a2, a3, a4, a5) -stmt_ty _Py_AugAssign(expr_ty target, operator_ty op, expr_ty value, int - lineno, int col_offset, PyArena *arena); -#define Print(a0, a1, a2, a3, a4, a5) _Py_Print(a0, a1, a2, a3, a4, a5) -stmt_ty _Py_Print(expr_ty dest, asdl_seq * values, bool nl, int lineno, int - col_offset, PyArena *arena); -#define For(a0, a1, a2, a3, a4, a5, a6) _Py_For(a0, a1, a2, a3, a4, a5, a6) -stmt_ty _Py_For(expr_ty target, expr_ty iter, asdl_seq * body, asdl_seq * - orelse, int lineno, int col_offset, PyArena *arena); -#define While(a0, a1, a2, a3, a4, a5) _Py_While(a0, a1, a2, a3, a4, a5) -stmt_ty _Py_While(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, - int col_offset, PyArena *arena); -#define If(a0, a1, a2, a3, a4, a5) _Py_If(a0, a1, a2, a3, a4, a5) -stmt_ty _Py_If(expr_ty test, asdl_seq * body, asdl_seq * orelse, int lineno, - int col_offset, PyArena *arena); -#define With(a0, a1, a2, a3, a4, a5) _Py_With(a0, a1, a2, a3, a4, a5) -stmt_ty _Py_With(expr_ty context_expr, expr_ty optional_vars, asdl_seq * body, - int lineno, int col_offset, PyArena *arena); -#define Raise(a0, a1, a2, a3, a4, a5) _Py_Raise(a0, a1, a2, a3, a4, a5) -stmt_ty _Py_Raise(expr_ty type, expr_ty inst, expr_ty tback, int lineno, int - col_offset, PyArena *arena); -#define TryExcept(a0, a1, a2, a3, a4, a5) _Py_TryExcept(a0, a1, a2, a3, a4, a5) -stmt_ty _Py_TryExcept(asdl_seq * body, asdl_seq * handlers, asdl_seq * orelse, - int lineno, int col_offset, PyArena *arena); -#define TryFinally(a0, a1, a2, a3, a4) _Py_TryFinally(a0, a1, a2, a3, a4) -stmt_ty _Py_TryFinally(asdl_seq * body, asdl_seq * finalbody, int lineno, int - col_offset, PyArena *arena); -#define Assert(a0, a1, a2, a3, a4) _Py_Assert(a0, a1, a2, a3, a4) -stmt_ty _Py_Assert(expr_ty test, expr_ty msg, int lineno, int col_offset, - PyArena *arena); -#define Import(a0, a1, a2, a3) _Py_Import(a0, a1, a2, a3) -stmt_ty _Py_Import(asdl_seq * names, int lineno, int col_offset, PyArena - *arena); -#define ImportFrom(a0, a1, a2, a3, a4, a5) _Py_ImportFrom(a0, a1, a2, a3, a4, a5) -stmt_ty _Py_ImportFrom(identifier module, asdl_seq * names, int level, int - lineno, int col_offset, PyArena *arena); -#define Exec(a0, a1, a2, a3, a4, a5) _Py_Exec(a0, a1, a2, a3, a4, a5) -stmt_ty _Py_Exec(expr_ty body, expr_ty globals, expr_ty locals, int lineno, int - col_offset, PyArena *arena); -#define Global(a0, a1, a2, a3) _Py_Global(a0, a1, a2, a3) -stmt_ty _Py_Global(asdl_seq * names, int lineno, int col_offset, PyArena - *arena); -#define Expr(a0, a1, a2, a3) _Py_Expr(a0, a1, a2, a3) -stmt_ty _Py_Expr(expr_ty value, int lineno, int col_offset, PyArena *arena); -#define Pass(a0, a1, a2) _Py_Pass(a0, a1, a2) -stmt_ty _Py_Pass(int lineno, int col_offset, PyArena *arena); -#define Break(a0, a1, a2) _Py_Break(a0, a1, a2) -stmt_ty _Py_Break(int lineno, int col_offset, PyArena *arena); -#define Continue(a0, a1, a2) _Py_Continue(a0, a1, a2) -stmt_ty _Py_Continue(int lineno, int col_offset, PyArena *arena); -#define BoolOp(a0, a1, a2, a3, a4) _Py_BoolOp(a0, a1, a2, a3, a4) -expr_ty _Py_BoolOp(boolop_ty op, asdl_seq * values, int lineno, int col_offset, - PyArena *arena); -#define BinOp(a0, a1, a2, a3, a4, a5) _Py_BinOp(a0, a1, a2, a3, a4, a5) -expr_ty _Py_BinOp(expr_ty left, operator_ty op, expr_ty right, int lineno, int - col_offset, PyArena *arena); -#define UnaryOp(a0, a1, a2, a3, a4) _Py_UnaryOp(a0, a1, a2, a3, a4) -expr_ty _Py_UnaryOp(unaryop_ty op, expr_ty operand, int lineno, int col_offset, - PyArena *arena); -#define Lambda(a0, a1, a2, a3, a4) _Py_Lambda(a0, a1, a2, a3, a4) -expr_ty _Py_Lambda(arguments_ty args, expr_ty body, int lineno, int col_offset, - PyArena *arena); -#define IfExp(a0, a1, a2, a3, a4, a5) _Py_IfExp(a0, a1, a2, a3, a4, a5) -expr_ty _Py_IfExp(expr_ty test, expr_ty body, expr_ty orelse, int lineno, int - col_offset, PyArena *arena); -#define Dict(a0, a1, a2, a3, a4) _Py_Dict(a0, a1, a2, a3, a4) -expr_ty _Py_Dict(asdl_seq * keys, asdl_seq * values, int lineno, int - col_offset, PyArena *arena); -#define Set(a0, a1, a2, a3) _Py_Set(a0, a1, a2, a3) -expr_ty _Py_Set(asdl_seq * elts, int lineno, int col_offset, PyArena *arena); -#define ListComp(a0, a1, a2, a3, a4) _Py_ListComp(a0, a1, a2, a3, a4) -expr_ty _Py_ListComp(expr_ty elt, asdl_seq * generators, int lineno, int - col_offset, PyArena *arena); -#define SetComp(a0, a1, a2, a3, a4) _Py_SetComp(a0, a1, a2, a3, a4) -expr_ty _Py_SetComp(expr_ty elt, asdl_seq * generators, int lineno, int - col_offset, PyArena *arena); -#define DictComp(a0, a1, a2, a3, a4, a5) _Py_DictComp(a0, a1, a2, a3, a4, a5) -expr_ty _Py_DictComp(expr_ty key, expr_ty value, asdl_seq * generators, int - lineno, int col_offset, PyArena *arena); -#define GeneratorExp(a0, a1, a2, a3, a4) _Py_GeneratorExp(a0, a1, a2, a3, a4) -expr_ty _Py_GeneratorExp(expr_ty elt, asdl_seq * generators, int lineno, int - col_offset, PyArena *arena); -#define Yield(a0, a1, a2, a3) _Py_Yield(a0, a1, a2, a3) -expr_ty _Py_Yield(expr_ty value, int lineno, int col_offset, PyArena *arena); -#define Compare(a0, a1, a2, a3, a4, a5) _Py_Compare(a0, a1, a2, a3, a4, a5) -expr_ty _Py_Compare(expr_ty left, asdl_int_seq * ops, asdl_seq * comparators, - int lineno, int col_offset, PyArena *arena); -#define Call(a0, a1, a2, a3, a4, a5, a6, a7) _Py_Call(a0, a1, a2, a3, a4, a5, a6, a7) -expr_ty _Py_Call(expr_ty func, asdl_seq * args, asdl_seq * keywords, expr_ty - starargs, expr_ty kwargs, int lineno, int col_offset, PyArena - *arena); -#define Repr(a0, a1, a2, a3) _Py_Repr(a0, a1, a2, a3) -expr_ty _Py_Repr(expr_ty value, int lineno, int col_offset, PyArena *arena); -#define Num(a0, a1, a2, a3) _Py_Num(a0, a1, a2, a3) -expr_ty _Py_Num(object n, int lineno, int col_offset, PyArena *arena); -#define Str(a0, a1, a2, a3) _Py_Str(a0, a1, a2, a3) -expr_ty _Py_Str(string s, int lineno, int col_offset, PyArena *arena); -#define Attribute(a0, a1, a2, a3, a4, a5) _Py_Attribute(a0, a1, a2, a3, a4, a5) -expr_ty _Py_Attribute(expr_ty value, identifier attr, expr_context_ty ctx, int - lineno, int col_offset, PyArena *arena); -#define Subscript(a0, a1, a2, a3, a4, a5) _Py_Subscript(a0, a1, a2, a3, a4, a5) -expr_ty _Py_Subscript(expr_ty value, slice_ty slice, expr_context_ty ctx, int - lineno, int col_offset, PyArena *arena); -#define Name(a0, a1, a2, a3, a4) _Py_Name(a0, a1, a2, a3, a4) -expr_ty _Py_Name(identifier id, expr_context_ty ctx, int lineno, int - col_offset, PyArena *arena); -#define List(a0, a1, a2, a3, a4) _Py_List(a0, a1, a2, a3, a4) -expr_ty _Py_List(asdl_seq * elts, expr_context_ty ctx, int lineno, int - col_offset, PyArena *arena); -#define Tuple(a0, a1, a2, a3, a4) _Py_Tuple(a0, a1, a2, a3, a4) -expr_ty _Py_Tuple(asdl_seq * elts, expr_context_ty ctx, int lineno, int - col_offset, PyArena *arena); -#define Ellipsis(a0) _Py_Ellipsis(a0) -slice_ty _Py_Ellipsis(PyArena *arena); -#define Slice(a0, a1, a2, a3) _Py_Slice(a0, a1, a2, a3) -slice_ty _Py_Slice(expr_ty lower, expr_ty upper, expr_ty step, PyArena *arena); -#define ExtSlice(a0, a1) _Py_ExtSlice(a0, a1) -slice_ty _Py_ExtSlice(asdl_seq * dims, PyArena *arena); -#define Index(a0, a1) _Py_Index(a0, a1) -slice_ty _Py_Index(expr_ty value, PyArena *arena); -#define comprehension(a0, a1, a2, a3) _Py_comprehension(a0, a1, a2, a3) -comprehension_ty _Py_comprehension(expr_ty target, expr_ty iter, asdl_seq * - ifs, PyArena *arena); -#define ExceptHandler(a0, a1, a2, a3, a4, a5) _Py_ExceptHandler(a0, a1, a2, a3, a4, a5) -excepthandler_ty _Py_ExceptHandler(expr_ty type, expr_ty name, asdl_seq * body, - int lineno, int col_offset, PyArena *arena); -#define arguments(a0, a1, a2, a3, a4) _Py_arguments(a0, a1, a2, a3, a4) -arguments_ty _Py_arguments(asdl_seq * args, identifier vararg, identifier - kwarg, asdl_seq * defaults, PyArena *arena); -#define keyword(a0, a1, a2) _Py_keyword(a0, a1, a2) -keyword_ty _Py_keyword(identifier arg, expr_ty value, PyArena *arena); -#define alias(a0, a1, a2) _Py_alias(a0, a1, a2) -alias_ty _Py_alias(identifier name, identifier asname, PyArena *arena); - -PyObject* PyAST_mod2obj(mod_ty t); -mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode); -int PyAST_Check(PyObject* obj);
--- a/test/include/python2.7/Python.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,178 +0,0 @@ -#ifndef Py_PYTHON_H -#define Py_PYTHON_H -/* Since this is a "meta-include" file, no #ifdef __cplusplus / extern "C" { */ - -/* Include nearly all Python header files */ - -#include "patchlevel.h" -#include "pyconfig.h" -#include "pymacconfig.h" - -/* Cyclic gc is always enabled, starting with release 2.3a1. Supply the - * old symbol for the benefit of extension modules written before then - * that may be conditionalizing on it. The core doesn't use it anymore. - */ -#ifndef WITH_CYCLE_GC -#define WITH_CYCLE_GC 1 -#endif - -#include <limits.h> - -#ifndef UCHAR_MAX -#error "Something's broken. UCHAR_MAX should be defined in limits.h." -#endif - -#if UCHAR_MAX != 255 -#error "Python's source code assumes C's unsigned char is an 8-bit type." -#endif - -#if defined(__sgi) && defined(WITH_THREAD) && !defined(_SGI_MP_SOURCE) -#define _SGI_MP_SOURCE -#endif - -#include <stdio.h> -#ifndef NULL -# error "Python.h requires that stdio.h define NULL." -#endif - -#include <string.h> -#ifdef HAVE_ERRNO_H -#include <errno.h> -#endif -#include <stdlib.h> -#ifdef HAVE_UNISTD_H -#include <unistd.h> -#endif - -/* For size_t? */ -#ifdef HAVE_STDDEF_H -#include <stddef.h> -#endif - -/* CAUTION: Build setups should ensure that NDEBUG is defined on the - * compiler command line when building Python in release mode; else - * assert() calls won't be removed. - */ -#include <assert.h> - -#include "pyport.h" - -/* pyconfig.h or pyport.h may or may not define DL_IMPORT */ -#ifndef DL_IMPORT /* declarations for DLL import/export */ -#define DL_IMPORT(RTYPE) RTYPE -#endif -#ifndef DL_EXPORT /* declarations for DLL import/export */ -#define DL_EXPORT(RTYPE) RTYPE -#endif - -/* Debug-mode build with pymalloc implies PYMALLOC_DEBUG. - * PYMALLOC_DEBUG is in error if pymalloc is not in use. - */ -#if defined(Py_DEBUG) && defined(WITH_PYMALLOC) && !defined(PYMALLOC_DEBUG) -#define PYMALLOC_DEBUG -#endif -#if defined(PYMALLOC_DEBUG) && !defined(WITH_PYMALLOC) -#error "PYMALLOC_DEBUG requires WITH_PYMALLOC" -#endif -#include "pymath.h" -#include "pymem.h" - -#include "object.h" -#include "objimpl.h" - -#include "pydebug.h" - -#include "unicodeobject.h" -#include "intobject.h" -#include "boolobject.h" -#include "longobject.h" -#include "floatobject.h" -#ifndef WITHOUT_COMPLEX -#include "complexobject.h" -#endif -#include "rangeobject.h" -#include "stringobject.h" -#include "memoryobject.h" -#include "bufferobject.h" -#include "bytesobject.h" -#include "bytearrayobject.h" -#include "tupleobject.h" -#include "listobject.h" -#include "dictobject.h" -#include "enumobject.h" -#include "setobject.h" -#include "methodobject.h" -#include "moduleobject.h" -#include "funcobject.h" -#include "classobject.h" -#include "fileobject.h" -#include "cobject.h" -#include "pycapsule.h" -#include "traceback.h" -#include "sliceobject.h" -#include "cellobject.h" -#include "iterobject.h" -#include "genobject.h" -#include "descrobject.h" -#include "warnings.h" -#include "weakrefobject.h" - -#include "codecs.h" -#include "pyerrors.h" - -#include "pystate.h" - -#include "pyarena.h" -#include "modsupport.h" -#include "pythonrun.h" -#include "ceval.h" -#include "sysmodule.h" -#include "intrcheck.h" -#include "import.h" - -#include "abstract.h" - -#include "compile.h" -#include "eval.h" - -#include "pyctype.h" -#include "pystrtod.h" -#include "pystrcmp.h" -#include "dtoa.h" - -/* _Py_Mangle is defined in compile.c */ -PyAPI_FUNC(PyObject*) _Py_Mangle(PyObject *p, PyObject *name); - -/* PyArg_GetInt is deprecated and should not be used, use PyArg_Parse(). */ -#define PyArg_GetInt(v, a) PyArg_Parse((v), "i", (a)) - -/* PyArg_NoArgs should not be necessary. - Set ml_flags in the PyMethodDef to METH_NOARGS. */ -#define PyArg_NoArgs(v) PyArg_Parse(v, "") - -/* Argument must be a char or an int in [-128, 127] or [0, 255]. */ -#define Py_CHARMASK(c) ((unsigned char)((c) & 0xff)) - -#include "pyfpe.h" - -/* These definitions must match corresponding definitions in graminit.h. - There's code in compile.c that checks that they are the same. */ -#define Py_single_input 256 -#define Py_file_input 257 -#define Py_eval_input 258 - -#ifdef HAVE_PTH -/* GNU pth user-space thread support */ -#include <pth.h> -#endif - -/* Define macros for inline documentation. */ -#define PyDoc_VAR(name) static char name[] -#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) -#ifdef WITH_DOC_STRINGS -#define PyDoc_STR(str) str -#else -#define PyDoc_STR(str) "" -#endif - -#endif /* !Py_PYTHON_H */
--- a/test/include/python2.7/abstract.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1396 +0,0 @@ -#ifndef Py_ABSTRACTOBJECT_H -#define Py_ABSTRACTOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef PY_SSIZE_T_CLEAN -#define PyObject_CallFunction _PyObject_CallFunction_SizeT -#define PyObject_CallMethod _PyObject_CallMethod_SizeT -#endif - -/* Abstract Object Interface (many thanks to Jim Fulton) */ - -/* - PROPOSAL: A Generic Python Object Interface for Python C Modules - -Problem - - Python modules written in C that must access Python objects must do - so through routines whose interfaces are described by a set of - include files. Unfortunately, these routines vary according to the - object accessed. To use these routines, the C programmer must check - the type of the object being used and must call a routine based on - the object type. For example, to access an element of a sequence, - the programmer must determine whether the sequence is a list or a - tuple: - - if(is_tupleobject(o)) - e=gettupleitem(o,i) - else if(is_listitem(o)) - e=getlistitem(o,i) - - If the programmer wants to get an item from another type of object - that provides sequence behavior, there is no clear way to do it - correctly. - - The persistent programmer may peruse object.h and find that the - _typeobject structure provides a means of invoking up to (currently - about) 41 special operators. So, for example, a routine can get an - item from any object that provides sequence behavior. However, to - use this mechanism, the programmer must make their code dependent on - the current Python implementation. - - Also, certain semantics, especially memory management semantics, may - differ by the type of object being used. Unfortunately, these - semantics are not clearly described in the current include files. - An abstract interface providing more consistent semantics is needed. - -Proposal - - I propose the creation of a standard interface (with an associated - library of routines and/or macros) for generically obtaining the - services of Python objects. This proposal can be viewed as one - components of a Python C interface consisting of several components. - - From the viewpoint of C access to Python services, we have (as - suggested by Guido in off-line discussions): - - - "Very high level layer": two or three functions that let you exec or - eval arbitrary Python code given as a string in a module whose name is - given, passing C values in and getting C values out using - mkvalue/getargs style format strings. This does not require the user - to declare any variables of type "PyObject *". This should be enough - to write a simple application that gets Python code from the user, - execs it, and returns the output or errors. (Error handling must also - be part of this API.) - - - "Abstract objects layer": which is the subject of this proposal. - It has many functions operating on objects, and lest you do many - things from C that you can also write in Python, without going - through the Python parser. - - - "Concrete objects layer": This is the public type-dependent - interface provided by the standard built-in types, such as floats, - strings, and lists. This interface exists and is currently - documented by the collection of include files provided with the - Python distributions. - - From the point of view of Python accessing services provided by C - modules: - - - "Python module interface": this interface consist of the basic - routines used to define modules and their members. Most of the - current extensions-writing guide deals with this interface. - - - "Built-in object interface": this is the interface that a new - built-in type must provide and the mechanisms and rules that a - developer of a new built-in type must use and follow. - - This proposal is a "first-cut" that is intended to spur - discussion. See especially the lists of notes. - - The Python C object interface will provide four protocols: object, - numeric, sequence, and mapping. Each protocol consists of a - collection of related operations. If an operation that is not - provided by a particular type is invoked, then a standard exception, - NotImplementedError is raised with a operation name as an argument. - In addition, for convenience this interface defines a set of - constructors for building objects of built-in types. This is needed - so new objects can be returned from C functions that otherwise treat - objects generically. - -Memory Management - - For all of the functions described in this proposal, if a function - retains a reference to a Python object passed as an argument, then the - function will increase the reference count of the object. It is - unnecessary for the caller to increase the reference count of an - argument in anticipation of the object's retention. - - All Python objects returned from functions should be treated as new - objects. Functions that return objects assume that the caller will - retain a reference and the reference count of the object has already - been incremented to account for this fact. A caller that does not - retain a reference to an object that is returned from a function - must decrement the reference count of the object (using - DECREF(object)) to prevent memory leaks. - - Note that the behavior mentioned here is different from the current - behavior for some objects (e.g. lists and tuples) when certain - type-specific routines are called directly (e.g. setlistitem). The - proposed abstraction layer will provide a consistent memory - management interface, correcting for inconsistent behavior for some - built-in types. - -Protocols - -xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx*/ - -/* Object Protocol: */ - - /* Implemented elsewhere: - - int PyObject_Print(PyObject *o, FILE *fp, int flags); - - Print an object, o, on file, fp. Returns -1 on - error. The flags argument is used to enable certain printing - options. The only option currently supported is Py_Print_RAW. - - (What should be said about Py_Print_RAW?) - - */ - - /* Implemented elsewhere: - - int PyObject_HasAttrString(PyObject *o, char *attr_name); - - Returns 1 if o has the attribute attr_name, and 0 otherwise. - This is equivalent to the Python expression: - hasattr(o,attr_name). - - This function always succeeds. - - */ - - /* Implemented elsewhere: - - PyObject* PyObject_GetAttrString(PyObject *o, char *attr_name); - - Retrieve an attributed named attr_name form object o. - Returns the attribute value on success, or NULL on failure. - This is the equivalent of the Python expression: o.attr_name. - - */ - - /* Implemented elsewhere: - - int PyObject_HasAttr(PyObject *o, PyObject *attr_name); - - Returns 1 if o has the attribute attr_name, and 0 otherwise. - This is equivalent to the Python expression: - hasattr(o,attr_name). - - This function always succeeds. - - */ - - /* Implemented elsewhere: - - PyObject* PyObject_GetAttr(PyObject *o, PyObject *attr_name); - - Retrieve an attributed named attr_name form object o. - Returns the attribute value on success, or NULL on failure. - This is the equivalent of the Python expression: o.attr_name. - - */ - - - /* Implemented elsewhere: - - int PyObject_SetAttrString(PyObject *o, char *attr_name, PyObject *v); - - Set the value of the attribute named attr_name, for object o, - to the value, v. Returns -1 on failure. This is - the equivalent of the Python statement: o.attr_name=v. - - */ - - /* Implemented elsewhere: - - int PyObject_SetAttr(PyObject *o, PyObject *attr_name, PyObject *v); - - Set the value of the attribute named attr_name, for object o, - to the value, v. Returns -1 on failure. This is - the equivalent of the Python statement: o.attr_name=v. - - */ - - /* implemented as a macro: - - int PyObject_DelAttrString(PyObject *o, char *attr_name); - - Delete attribute named attr_name, for object o. Returns - -1 on failure. This is the equivalent of the Python - statement: del o.attr_name. - - */ -#define PyObject_DelAttrString(O,A) PyObject_SetAttrString((O),(A),NULL) - - /* implemented as a macro: - - int PyObject_DelAttr(PyObject *o, PyObject *attr_name); - - Delete attribute named attr_name, for object o. Returns -1 - on failure. This is the equivalent of the Python - statement: del o.attr_name. - - */ -#define PyObject_DelAttr(O,A) PyObject_SetAttr((O),(A),NULL) - - PyAPI_FUNC(int) PyObject_Cmp(PyObject *o1, PyObject *o2, int *result); - - /* - Compare the values of o1 and o2 using a routine provided by - o1, if one exists, otherwise with a routine provided by o2. - The result of the comparison is returned in result. Returns - -1 on failure. This is the equivalent of the Python - statement: result=cmp(o1,o2). - - */ - - /* Implemented elsewhere: - - int PyObject_Compare(PyObject *o1, PyObject *o2); - - Compare the values of o1 and o2 using a routine provided by - o1, if one exists, otherwise with a routine provided by o2. - Returns the result of the comparison on success. On error, - the value returned is undefined. This is equivalent to the - Python expression: cmp(o1,o2). - - */ - - /* Implemented elsewhere: - - PyObject *PyObject_Repr(PyObject *o); - - Compute the string representation of object, o. Returns the - string representation on success, NULL on failure. This is - the equivalent of the Python expression: repr(o). - - Called by the repr() built-in function and by reverse quotes. - - */ - - /* Implemented elsewhere: - - PyObject *PyObject_Str(PyObject *o); - - Compute the string representation of object, o. Returns the - string representation on success, NULL on failure. This is - the equivalent of the Python expression: str(o).) - - Called by the str() built-in function and by the print - statement. - - */ - - /* Implemented elsewhere: - - PyObject *PyObject_Unicode(PyObject *o); - - Compute the unicode representation of object, o. Returns the - unicode representation on success, NULL on failure. This is - the equivalent of the Python expression: unistr(o).) - - Called by the unistr() built-in function. - - */ - - /* Declared elsewhere - - PyAPI_FUNC(int) PyCallable_Check(PyObject *o); - - Determine if the object, o, is callable. Return 1 if the - object is callable and 0 otherwise. - - This function always succeeds. - - */ - - - - PyAPI_FUNC(PyObject *) PyObject_Call(PyObject *callable_object, - PyObject *args, PyObject *kw); - - /* - Call a callable Python object, callable_object, with - arguments and keywords arguments. The 'args' argument can not be - NULL, but the 'kw' argument can be NULL. - - */ - - PyAPI_FUNC(PyObject *) PyObject_CallObject(PyObject *callable_object, - PyObject *args); - - /* - Call a callable Python object, callable_object, with - arguments given by the tuple, args. If no arguments are - needed, then args may be NULL. Returns the result of the - call on success, or NULL on failure. This is the equivalent - of the Python expression: apply(o,args). - - */ - - PyAPI_FUNC(PyObject *) PyObject_CallFunction(PyObject *callable_object, - char *format, ...); - - /* - Call a callable Python object, callable_object, with a - variable number of C arguments. The C arguments are described - using a mkvalue-style format string. The format may be NULL, - indicating that no arguments are provided. Returns the - result of the call on success, or NULL on failure. This is - the equivalent of the Python expression: apply(o,args). - - */ - - - PyAPI_FUNC(PyObject *) PyObject_CallMethod(PyObject *o, char *m, - char *format, ...); - - /* - Call the method named m of object o with a variable number of - C arguments. The C arguments are described by a mkvalue - format string. The format may be NULL, indicating that no - arguments are provided. Returns the result of the call on - success, or NULL on failure. This is the equivalent of the - Python expression: o.method(args). - */ - - PyAPI_FUNC(PyObject *) _PyObject_CallFunction_SizeT(PyObject *callable, - char *format, ...); - PyAPI_FUNC(PyObject *) _PyObject_CallMethod_SizeT(PyObject *o, - char *name, - char *format, ...); - - PyAPI_FUNC(PyObject *) PyObject_CallFunctionObjArgs(PyObject *callable, - ...); - - /* - Call a callable Python object, callable_object, with a - variable number of C arguments. The C arguments are provided - as PyObject * values, terminated by a NULL. Returns the - result of the call on success, or NULL on failure. This is - the equivalent of the Python expression: apply(o,args). - */ - - - PyAPI_FUNC(PyObject *) PyObject_CallMethodObjArgs(PyObject *o, - PyObject *m, ...); - - /* - Call the method named m of object o with a variable number of - C arguments. The C arguments are provided as PyObject * - values, terminated by NULL. Returns the result of the call - on success, or NULL on failure. This is the equivalent of - the Python expression: o.method(args). - */ - - - /* Implemented elsewhere: - - long PyObject_Hash(PyObject *o); - - Compute and return the hash, hash_value, of an object, o. On - failure, return -1. This is the equivalent of the Python - expression: hash(o). - - */ - - - /* Implemented elsewhere: - - int PyObject_IsTrue(PyObject *o); - - Returns 1 if the object, o, is considered to be true, 0 if o is - considered to be false and -1 on failure. This is equivalent to the - Python expression: not not o - - */ - - /* Implemented elsewhere: - - int PyObject_Not(PyObject *o); - - Returns 0 if the object, o, is considered to be true, 1 if o is - considered to be false and -1 on failure. This is equivalent to the - Python expression: not o - - */ - - PyAPI_FUNC(PyObject *) PyObject_Type(PyObject *o); - - /* - On success, returns a type object corresponding to the object - type of object o. On failure, returns NULL. This is - equivalent to the Python expression: type(o). - */ - - PyAPI_FUNC(Py_ssize_t) PyObject_Size(PyObject *o); - - /* - Return the size of object o. If the object, o, provides - both sequence and mapping protocols, the sequence size is - returned. On error, -1 is returned. This is the equivalent - to the Python expression: len(o). - - */ - - /* For DLL compatibility */ -#undef PyObject_Length - PyAPI_FUNC(Py_ssize_t) PyObject_Length(PyObject *o); -#define PyObject_Length PyObject_Size - - PyAPI_FUNC(Py_ssize_t) _PyObject_LengthHint(PyObject *o, Py_ssize_t); - - /* - Guess the size of object o using len(o) or o.__length_hint__(). - If neither of those return a non-negative value, then return the - default value. If one of the calls fails, this function returns -1. - */ - - PyAPI_FUNC(PyObject *) PyObject_GetItem(PyObject *o, PyObject *key); - - /* - Return element of o corresponding to the object, key, or NULL - on failure. This is the equivalent of the Python expression: - o[key]. - - */ - - PyAPI_FUNC(int) PyObject_SetItem(PyObject *o, PyObject *key, PyObject *v); - - /* - Map the object, key, to the value, v. Returns - -1 on failure. This is the equivalent of the Python - statement: o[key]=v. - */ - - PyAPI_FUNC(int) PyObject_DelItemString(PyObject *o, char *key); - - /* - Remove the mapping for object, key, from the object *o. - Returns -1 on failure. This is equivalent to - the Python statement: del o[key]. - */ - - PyAPI_FUNC(int) PyObject_DelItem(PyObject *o, PyObject *key); - - /* - Delete the mapping for key from *o. Returns -1 on failure. - This is the equivalent of the Python statement: del o[key]. - */ - - PyAPI_FUNC(int) PyObject_AsCharBuffer(PyObject *obj, - const char **buffer, - Py_ssize_t *buffer_len); - - /* - Takes an arbitrary object which must support the (character, - single segment) buffer interface and returns a pointer to a - read-only memory location useable as character based input - for subsequent processing. - - 0 is returned on success. buffer and buffer_len are only - set in case no error occurs. Otherwise, -1 is returned and - an exception set. - - */ - - PyAPI_FUNC(int) PyObject_CheckReadBuffer(PyObject *obj); - - /* - Checks whether an arbitrary object supports the (character, - single segment) buffer interface. Returns 1 on success, 0 - on failure. - - */ - - PyAPI_FUNC(int) PyObject_AsReadBuffer(PyObject *obj, - const void **buffer, - Py_ssize_t *buffer_len); - - /* - Same as PyObject_AsCharBuffer() except that this API expects - (readable, single segment) buffer interface and returns a - pointer to a read-only memory location which can contain - arbitrary data. - - 0 is returned on success. buffer and buffer_len are only - set in case no error occurs. Otherwise, -1 is returned and - an exception set. - - */ - - PyAPI_FUNC(int) PyObject_AsWriteBuffer(PyObject *obj, - void **buffer, - Py_ssize_t *buffer_len); - - /* - Takes an arbitrary object which must support the (writeable, - single segment) buffer interface and returns a pointer to a - writeable memory location in buffer of size buffer_len. - - 0 is returned on success. buffer and buffer_len are only - set in case no error occurs. Otherwise, -1 is returned and - an exception set. - - */ - - /* new buffer API */ - -#define PyObject_CheckBuffer(obj) \ - (((obj)->ob_type->tp_as_buffer != NULL) && \ - (PyType_HasFeature((obj)->ob_type, Py_TPFLAGS_HAVE_NEWBUFFER)) && \ - ((obj)->ob_type->tp_as_buffer->bf_getbuffer != NULL)) - - /* Return 1 if the getbuffer function is available, otherwise - return 0 */ - - PyAPI_FUNC(int) PyObject_GetBuffer(PyObject *obj, Py_buffer *view, - int flags); - - /* This is a C-API version of the getbuffer function call. It checks - to make sure object has the required function pointer and issues the - call. Returns -1 and raises an error on failure and returns 0 on - success - */ - - - PyAPI_FUNC(void *) PyBuffer_GetPointer(Py_buffer *view, Py_ssize_t *indices); - - /* Get the memory area pointed to by the indices for the buffer given. - Note that view->ndim is the assumed size of indices - */ - - PyAPI_FUNC(int) PyBuffer_SizeFromFormat(const char *); - - /* Return the implied itemsize of the data-format area from a - struct-style description */ - - - - PyAPI_FUNC(int) PyBuffer_ToContiguous(void *buf, Py_buffer *view, - Py_ssize_t len, char fort); - - PyAPI_FUNC(int) PyBuffer_FromContiguous(Py_buffer *view, void *buf, - Py_ssize_t len, char fort); - - - /* Copy len bytes of data from the contiguous chunk of memory - pointed to by buf into the buffer exported by obj. Return - 0 on success and return -1 and raise a PyBuffer_Error on - error (i.e. the object does not have a buffer interface or - it is not working). - - If fort is 'F' and the object is multi-dimensional, - then the data will be copied into the array in - Fortran-style (first dimension varies the fastest). If - fort is 'C', then the data will be copied into the array - in C-style (last dimension varies the fastest). If fort - is 'A', then it does not matter and the copy will be made - in whatever way is more efficient. - - */ - - PyAPI_FUNC(int) PyObject_CopyData(PyObject *dest, PyObject *src); - - /* Copy the data from the src buffer to the buffer of destination - */ - - PyAPI_FUNC(int) PyBuffer_IsContiguous(Py_buffer *view, char fort); - - - PyAPI_FUNC(void) PyBuffer_FillContiguousStrides(int ndims, - Py_ssize_t *shape, - Py_ssize_t *strides, - int itemsize, - char fort); - - /* Fill the strides array with byte-strides of a contiguous - (Fortran-style if fort is 'F' or C-style otherwise) - array of the given shape with the given number of bytes - per element. - */ - - PyAPI_FUNC(int) PyBuffer_FillInfo(Py_buffer *view, PyObject *o, void *buf, - Py_ssize_t len, int readonly, - int flags); - - /* Fills in a buffer-info structure correctly for an exporter - that can only share a contiguous chunk of memory of - "unsigned bytes" of the given length. Returns 0 on success - and -1 (with raising an error) on error. - */ - - PyAPI_FUNC(void) PyBuffer_Release(Py_buffer *view); - - /* Releases a Py_buffer obtained from getbuffer ParseTuple's s*. - */ - - PyAPI_FUNC(PyObject *) PyObject_Format(PyObject* obj, - PyObject *format_spec); - /* - Takes an arbitrary object and returns the result of - calling obj.__format__(format_spec). - */ - -/* Iterators */ - - PyAPI_FUNC(PyObject *) PyObject_GetIter(PyObject *); - /* Takes an object and returns an iterator for it. - This is typically a new iterator but if the argument - is an iterator, this returns itself. */ - -#define PyIter_Check(obj) \ - (PyType_HasFeature((obj)->ob_type, Py_TPFLAGS_HAVE_ITER) && \ - (obj)->ob_type->tp_iternext != NULL && \ - (obj)->ob_type->tp_iternext != &_PyObject_NextNotImplemented) - - PyAPI_FUNC(PyObject *) PyIter_Next(PyObject *); - /* Takes an iterator object and calls its tp_iternext slot, - returning the next value. If the iterator is exhausted, - this returns NULL without setting an exception. - NULL with an exception means an error occurred. */ - -/* Number Protocol:*/ - - PyAPI_FUNC(int) PyNumber_Check(PyObject *o); - - /* - Returns 1 if the object, o, provides numeric protocols, and - false otherwise. - - This function always succeeds. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Add(PyObject *o1, PyObject *o2); - - /* - Returns the result of adding o1 and o2, or null on failure. - This is the equivalent of the Python expression: o1+o2. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Subtract(PyObject *o1, PyObject *o2); - - /* - Returns the result of subtracting o2 from o1, or null on - failure. This is the equivalent of the Python expression: - o1-o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Multiply(PyObject *o1, PyObject *o2); - - /* - Returns the result of multiplying o1 and o2, or null on - failure. This is the equivalent of the Python expression: - o1*o2. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Divide(PyObject *o1, PyObject *o2); - - /* - Returns the result of dividing o1 by o2, or null on failure. - This is the equivalent of the Python expression: o1/o2. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_FloorDivide(PyObject *o1, PyObject *o2); - - /* - Returns the result of dividing o1 by o2 giving an integral result, - or null on failure. - This is the equivalent of the Python expression: o1//o2. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_TrueDivide(PyObject *o1, PyObject *o2); - - /* - Returns the result of dividing o1 by o2 giving a float result, - or null on failure. - This is the equivalent of the Python expression: o1/o2. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Remainder(PyObject *o1, PyObject *o2); - - /* - Returns the remainder of dividing o1 by o2, or null on - failure. This is the equivalent of the Python expression: - o1%o2. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Divmod(PyObject *o1, PyObject *o2); - - /* - See the built-in function divmod. Returns NULL on failure. - This is the equivalent of the Python expression: - divmod(o1,o2). - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Power(PyObject *o1, PyObject *o2, - PyObject *o3); - - /* - See the built-in function pow. Returns NULL on failure. - This is the equivalent of the Python expression: - pow(o1,o2,o3), where o3 is optional. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Negative(PyObject *o); - - /* - Returns the negation of o on success, or null on failure. - This is the equivalent of the Python expression: -o. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Positive(PyObject *o); - - /* - Returns the (what?) of o on success, or NULL on failure. - This is the equivalent of the Python expression: +o. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Absolute(PyObject *o); - - /* - Returns the absolute value of o, or null on failure. This is - the equivalent of the Python expression: abs(o). - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Invert(PyObject *o); - - /* - Returns the bitwise negation of o on success, or NULL on - failure. This is the equivalent of the Python expression: - ~o. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Lshift(PyObject *o1, PyObject *o2); - - /* - Returns the result of left shifting o1 by o2 on success, or - NULL on failure. This is the equivalent of the Python - expression: o1 << o2. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Rshift(PyObject *o1, PyObject *o2); - - /* - Returns the result of right shifting o1 by o2 on success, or - NULL on failure. This is the equivalent of the Python - expression: o1 >> o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_And(PyObject *o1, PyObject *o2); - - /* - Returns the result of bitwise and of o1 and o2 on success, or - NULL on failure. This is the equivalent of the Python - expression: o1&o2. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Xor(PyObject *o1, PyObject *o2); - - /* - Returns the bitwise exclusive or of o1 by o2 on success, or - NULL on failure. This is the equivalent of the Python - expression: o1^o2. - - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Or(PyObject *o1, PyObject *o2); - - /* - Returns the result of bitwise or on o1 and o2 on success, or - NULL on failure. This is the equivalent of the Python - expression: o1|o2. - - */ - - /* Implemented elsewhere: - - int PyNumber_Coerce(PyObject **p1, PyObject **p2); - - This function takes the addresses of two variables of type - PyObject*. - - If the objects pointed to by *p1 and *p2 have the same type, - increment their reference count and return 0 (success). - If the objects can be converted to a common numeric type, - replace *p1 and *p2 by their converted value (with 'new' - reference counts), and return 0. - If no conversion is possible, or if some other error occurs, - return -1 (failure) and don't increment the reference counts. - The call PyNumber_Coerce(&o1, &o2) is equivalent to the Python - statement o1, o2 = coerce(o1, o2). - - */ - -#define PyIndex_Check(obj) \ - ((obj)->ob_type->tp_as_number != NULL && \ - PyType_HasFeature((obj)->ob_type, Py_TPFLAGS_HAVE_INDEX) && \ - (obj)->ob_type->tp_as_number->nb_index != NULL) - - PyAPI_FUNC(PyObject *) PyNumber_Index(PyObject *o); - - /* - Returns the object converted to a Python long or int - or NULL with an error raised on failure. - */ - - PyAPI_FUNC(Py_ssize_t) PyNumber_AsSsize_t(PyObject *o, PyObject *exc); - - /* - Returns the Integral instance converted to an int. The - instance is expected to be int or long or have an __int__ - method. Steals integral's reference. error_format will be - used to create the TypeError if integral isn't actually an - Integral instance. error_format should be a format string - that can accept a char* naming integral's type. - */ - - PyAPI_FUNC(PyObject *) _PyNumber_ConvertIntegralToInt( - PyObject *integral, - const char* error_format); - - /* - Returns the object converted to Py_ssize_t by going through - PyNumber_Index first. If an overflow error occurs while - converting the int-or-long to Py_ssize_t, then the second argument - is the error-type to return. If it is NULL, then the overflow error - is cleared and the value is clipped. - */ - - PyAPI_FUNC(PyObject *) PyNumber_Int(PyObject *o); - - /* - Returns the o converted to an integer object on success, or - NULL on failure. This is the equivalent of the Python - expression: int(o). - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Long(PyObject *o); - - /* - Returns the o converted to a long integer object on success, - or NULL on failure. This is the equivalent of the Python - expression: long(o). - - */ - - PyAPI_FUNC(PyObject *) PyNumber_Float(PyObject *o); - - /* - Returns the o converted to a float object on success, or NULL - on failure. This is the equivalent of the Python expression: - float(o). - */ - -/* In-place variants of (some of) the above number protocol functions */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceAdd(PyObject *o1, PyObject *o2); - - /* - Returns the result of adding o2 to o1, possibly in-place, or null - on failure. This is the equivalent of the Python expression: - o1 += o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceSubtract(PyObject *o1, PyObject *o2); - - /* - Returns the result of subtracting o2 from o1, possibly in-place or - null on failure. This is the equivalent of the Python expression: - o1 -= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceMultiply(PyObject *o1, PyObject *o2); - - /* - Returns the result of multiplying o1 by o2, possibly in-place, or - null on failure. This is the equivalent of the Python expression: - o1 *= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceDivide(PyObject *o1, PyObject *o2); - - /* - Returns the result of dividing o1 by o2, possibly in-place, or null - on failure. This is the equivalent of the Python expression: - o1 /= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceFloorDivide(PyObject *o1, - PyObject *o2); - - /* - Returns the result of dividing o1 by o2 giving an integral result, - possibly in-place, or null on failure. - This is the equivalent of the Python expression: - o1 /= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceTrueDivide(PyObject *o1, - PyObject *o2); - - /* - Returns the result of dividing o1 by o2 giving a float result, - possibly in-place, or null on failure. - This is the equivalent of the Python expression: - o1 /= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceRemainder(PyObject *o1, PyObject *o2); - - /* - Returns the remainder of dividing o1 by o2, possibly in-place, or - null on failure. This is the equivalent of the Python expression: - o1 %= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlacePower(PyObject *o1, PyObject *o2, - PyObject *o3); - - /* - Returns the result of raising o1 to the power of o2, possibly - in-place, or null on failure. This is the equivalent of the Python - expression: o1 **= o2, or pow(o1, o2, o3) if o3 is present. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceLshift(PyObject *o1, PyObject *o2); - - /* - Returns the result of left shifting o1 by o2, possibly in-place, or - null on failure. This is the equivalent of the Python expression: - o1 <<= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceRshift(PyObject *o1, PyObject *o2); - - /* - Returns the result of right shifting o1 by o2, possibly in-place or - null on failure. This is the equivalent of the Python expression: - o1 >>= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceAnd(PyObject *o1, PyObject *o2); - - /* - Returns the result of bitwise and of o1 and o2, possibly in-place, - or null on failure. This is the equivalent of the Python - expression: o1 &= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceXor(PyObject *o1, PyObject *o2); - - /* - Returns the bitwise exclusive or of o1 by o2, possibly in-place, or - null on failure. This is the equivalent of the Python expression: - o1 ^= o2. - - */ - - PyAPI_FUNC(PyObject *) PyNumber_InPlaceOr(PyObject *o1, PyObject *o2); - - /* - Returns the result of bitwise or of o1 and o2, possibly in-place, - or null on failure. This is the equivalent of the Python - expression: o1 |= o2. - - */ - - - PyAPI_FUNC(PyObject *) PyNumber_ToBase(PyObject *n, int base); - - /* - Returns the integer n converted to a string with a base, with a base - marker of 0b, 0o or 0x prefixed if applicable. - If n is not an int object, it is converted with PyNumber_Index first. - */ - - -/* Sequence protocol:*/ - - PyAPI_FUNC(int) PySequence_Check(PyObject *o); - - /* - Return 1 if the object provides sequence protocol, and zero - otherwise. - - This function always succeeds. - - */ - - PyAPI_FUNC(Py_ssize_t) PySequence_Size(PyObject *o); - - /* - Return the size of sequence object o, or -1 on failure. - - */ - - /* For DLL compatibility */ -#undef PySequence_Length - PyAPI_FUNC(Py_ssize_t) PySequence_Length(PyObject *o); -#define PySequence_Length PySequence_Size - - - PyAPI_FUNC(PyObject *) PySequence_Concat(PyObject *o1, PyObject *o2); - - /* - Return the concatenation of o1 and o2 on success, and NULL on - failure. This is the equivalent of the Python - expression: o1+o2. - - */ - - PyAPI_FUNC(PyObject *) PySequence_Repeat(PyObject *o, Py_ssize_t count); - - /* - Return the result of repeating sequence object o count times, - or NULL on failure. This is the equivalent of the Python - expression: o1*count. - - */ - - PyAPI_FUNC(PyObject *) PySequence_GetItem(PyObject *o, Py_ssize_t i); - - /* - Return the ith element of o, or NULL on failure. This is the - equivalent of the Python expression: o[i]. - */ - - PyAPI_FUNC(PyObject *) PySequence_GetSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2); - - /* - Return the slice of sequence object o between i1 and i2, or - NULL on failure. This is the equivalent of the Python - expression: o[i1:i2]. - - */ - - PyAPI_FUNC(int) PySequence_SetItem(PyObject *o, Py_ssize_t i, PyObject *v); - - /* - Assign object v to the ith element of o. Returns - -1 on failure. This is the equivalent of the Python - statement: o[i]=v. - - */ - - PyAPI_FUNC(int) PySequence_DelItem(PyObject *o, Py_ssize_t i); - - /* - Delete the ith element of object v. Returns - -1 on failure. This is the equivalent of the Python - statement: del o[i]. - */ - - PyAPI_FUNC(int) PySequence_SetSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2, - PyObject *v); - - /* - Assign the sequence object, v, to the slice in sequence - object, o, from i1 to i2. Returns -1 on failure. This is the - equivalent of the Python statement: o[i1:i2]=v. - */ - - PyAPI_FUNC(int) PySequence_DelSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2); - - /* - Delete the slice in sequence object, o, from i1 to i2. - Returns -1 on failure. This is the equivalent of the Python - statement: del o[i1:i2]. - */ - - PyAPI_FUNC(PyObject *) PySequence_Tuple(PyObject *o); - - /* - Returns the sequence, o, as a tuple on success, and NULL on failure. - This is equivalent to the Python expression: tuple(o) - */ - - - PyAPI_FUNC(PyObject *) PySequence_List(PyObject *o); - /* - Returns the sequence, o, as a list on success, and NULL on failure. - This is equivalent to the Python expression: list(o) - */ - - PyAPI_FUNC(PyObject *) PySequence_Fast(PyObject *o, const char* m); - /* - Return the sequence, o, as a list, unless it's already a - tuple or list. Use PySequence_Fast_GET_ITEM to access the - members of this list, and PySequence_Fast_GET_SIZE to get its length. - - Returns NULL on failure. If the object does not support iteration, - raises a TypeError exception with m as the message text. - */ - -#define PySequence_Fast_GET_SIZE(o) \ - (PyList_Check(o) ? PyList_GET_SIZE(o) : PyTuple_GET_SIZE(o)) - /* - Return the size of o, assuming that o was returned by - PySequence_Fast and is not NULL. - */ - -#define PySequence_Fast_GET_ITEM(o, i)\ - (PyList_Check(o) ? PyList_GET_ITEM(o, i) : PyTuple_GET_ITEM(o, i)) - /* - Return the ith element of o, assuming that o was returned by - PySequence_Fast, and that i is within bounds. - */ - -#define PySequence_ITEM(o, i)\ - ( Py_TYPE(o)->tp_as_sequence->sq_item(o, i) ) - /* Assume tp_as_sequence and sq_item exist and that i does not - need to be corrected for a negative index - */ - -#define PySequence_Fast_ITEMS(sf) \ - (PyList_Check(sf) ? ((PyListObject *)(sf))->ob_item \ - : ((PyTupleObject *)(sf))->ob_item) - /* Return a pointer to the underlying item array for - an object retured by PySequence_Fast */ - - PyAPI_FUNC(Py_ssize_t) PySequence_Count(PyObject *o, PyObject *value); - - /* - Return the number of occurrences on value on o, that is, - return the number of keys for which o[key]==value. On - failure, return -1. This is equivalent to the Python - expression: o.count(value). - */ - - PyAPI_FUNC(int) PySequence_Contains(PyObject *seq, PyObject *ob); - /* - Return -1 if error; 1 if ob in seq; 0 if ob not in seq. - Use __contains__ if possible, else _PySequence_IterSearch(). - */ - -#define PY_ITERSEARCH_COUNT 1 -#define PY_ITERSEARCH_INDEX 2 -#define PY_ITERSEARCH_CONTAINS 3 - PyAPI_FUNC(Py_ssize_t) _PySequence_IterSearch(PyObject *seq, - PyObject *obj, int operation); - /* - Iterate over seq. Result depends on the operation: - PY_ITERSEARCH_COUNT: return # of times obj appears in seq; -1 if - error. - PY_ITERSEARCH_INDEX: return 0-based index of first occurrence of - obj in seq; set ValueError and return -1 if none found; - also return -1 on error. - PY_ITERSEARCH_CONTAINS: return 1 if obj in seq, else 0; -1 on - error. - */ - -/* For DLL-level backwards compatibility */ -#undef PySequence_In - PyAPI_FUNC(int) PySequence_In(PyObject *o, PyObject *value); - -/* For source-level backwards compatibility */ -#define PySequence_In PySequence_Contains - - /* - Determine if o contains value. If an item in o is equal to - X, return 1, otherwise return 0. On error, return -1. This - is equivalent to the Python expression: value in o. - */ - - PyAPI_FUNC(Py_ssize_t) PySequence_Index(PyObject *o, PyObject *value); - - /* - Return the first index for which o[i]=value. On error, - return -1. This is equivalent to the Python - expression: o.index(value). - */ - -/* In-place versions of some of the above Sequence functions. */ - - PyAPI_FUNC(PyObject *) PySequence_InPlaceConcat(PyObject *o1, PyObject *o2); - - /* - Append o2 to o1, in-place when possible. Return the resulting - object, which could be o1, or NULL on failure. This is the - equivalent of the Python expression: o1 += o2. - - */ - - PyAPI_FUNC(PyObject *) PySequence_InPlaceRepeat(PyObject *o, Py_ssize_t count); - - /* - Repeat o1 by count, in-place when possible. Return the resulting - object, which could be o1, or NULL on failure. This is the - equivalent of the Python expression: o1 *= count. - - */ - -/* Mapping protocol:*/ - - PyAPI_FUNC(int) PyMapping_Check(PyObject *o); - - /* - Return 1 if the object provides mapping protocol, and zero - otherwise. - - This function always succeeds. - */ - - PyAPI_FUNC(Py_ssize_t) PyMapping_Size(PyObject *o); - - /* - Returns the number of keys in object o on success, and -1 on - failure. For objects that do not provide sequence protocol, - this is equivalent to the Python expression: len(o). - */ - - /* For DLL compatibility */ -#undef PyMapping_Length - PyAPI_FUNC(Py_ssize_t) PyMapping_Length(PyObject *o); -#define PyMapping_Length PyMapping_Size - - - /* implemented as a macro: - - int PyMapping_DelItemString(PyObject *o, char *key); - - Remove the mapping for object, key, from the object *o. - Returns -1 on failure. This is equivalent to - the Python statement: del o[key]. - */ -#define PyMapping_DelItemString(O,K) PyObject_DelItemString((O),(K)) - - /* implemented as a macro: - - int PyMapping_DelItem(PyObject *o, PyObject *key); - - Remove the mapping for object, key, from the object *o. - Returns -1 on failure. This is equivalent to - the Python statement: del o[key]. - */ -#define PyMapping_DelItem(O,K) PyObject_DelItem((O),(K)) - - PyAPI_FUNC(int) PyMapping_HasKeyString(PyObject *o, char *key); - - /* - On success, return 1 if the mapping object has the key, key, - and 0 otherwise. This is equivalent to the Python expression: - o.has_key(key). - - This function always succeeds. - */ - - PyAPI_FUNC(int) PyMapping_HasKey(PyObject *o, PyObject *key); - - /* - Return 1 if the mapping object has the key, key, - and 0 otherwise. This is equivalent to the Python expression: - o.has_key(key). - - This function always succeeds. - - */ - - /* Implemented as macro: - - PyObject *PyMapping_Keys(PyObject *o); - - On success, return a list of the keys in object o. On - failure, return NULL. This is equivalent to the Python - expression: o.keys(). - */ -#define PyMapping_Keys(O) PyObject_CallMethod(O,"keys",NULL) - - /* Implemented as macro: - - PyObject *PyMapping_Values(PyObject *o); - - On success, return a list of the values in object o. On - failure, return NULL. This is equivalent to the Python - expression: o.values(). - */ -#define PyMapping_Values(O) PyObject_CallMethod(O,"values",NULL) - - /* Implemented as macro: - - PyObject *PyMapping_Items(PyObject *o); - - On success, return a list of the items in object o, where - each item is a tuple containing a key-value pair. On - failure, return NULL. This is equivalent to the Python - expression: o.items(). - - */ -#define PyMapping_Items(O) PyObject_CallMethod(O,"items",NULL) - - PyAPI_FUNC(PyObject *) PyMapping_GetItemString(PyObject *o, char *key); - - /* - Return element of o corresponding to the object, key, or NULL - on failure. This is the equivalent of the Python expression: - o[key]. - */ - - PyAPI_FUNC(int) PyMapping_SetItemString(PyObject *o, char *key, - PyObject *value); - - /* - Map the object, key, to the value, v. Returns - -1 on failure. This is the equivalent of the Python - statement: o[key]=v. - */ - - -PyAPI_FUNC(int) PyObject_IsInstance(PyObject *object, PyObject *typeorclass); - /* isinstance(object, typeorclass) */ - -PyAPI_FUNC(int) PyObject_IsSubclass(PyObject *object, PyObject *typeorclass); - /* issubclass(object, typeorclass) */ - - -PyAPI_FUNC(int) _PyObject_RealIsInstance(PyObject *inst, PyObject *cls); - -PyAPI_FUNC(int) _PyObject_RealIsSubclass(PyObject *derived, PyObject *cls); - - -/* For internal use by buffer API functions */ -PyAPI_FUNC(void) _Py_add_one_to_index_F(int nd, Py_ssize_t *index, - const Py_ssize_t *shape); -PyAPI_FUNC(void) _Py_add_one_to_index_C(int nd, Py_ssize_t *index, - const Py_ssize_t *shape); - - -#ifdef __cplusplus -} -#endif -#endif /* Py_ABSTRACTOBJECT_H */
--- a/test/include/python2.7/asdl.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -#ifndef Py_ASDL_H -#define Py_ASDL_H - -typedef PyObject * identifier; -typedef PyObject * string; -typedef PyObject * object; - -#ifndef __cplusplus -typedef enum {false, true} bool; -#endif - -/* It would be nice if the code generated by asdl_c.py was completely - independent of Python, but it is a goal the requires too much work - at this stage. So, for example, I'll represent identifiers as - interned Python strings. -*/ - -/* XXX A sequence should be typed so that its use can be typechecked. */ - -typedef struct { - int size; - void *elements[1]; -} asdl_seq; - -typedef struct { - int size; - int elements[1]; -} asdl_int_seq; - -asdl_seq *asdl_seq_new(int size, PyArena *arena); -asdl_int_seq *asdl_int_seq_new(int size, PyArena *arena); - -#define asdl_seq_GET(S, I) (S)->elements[(I)] -#define asdl_seq_LEN(S) ((S) == NULL ? 0 : (S)->size) -#ifdef Py_DEBUG -#define asdl_seq_SET(S, I, V) { \ - int _asdl_i = (I); \ - assert((S) && _asdl_i < (S)->size); \ - (S)->elements[_asdl_i] = (V); \ -} -#else -#define asdl_seq_SET(S, I, V) (S)->elements[I] = (V) -#endif - -#endif /* !Py_ASDL_H */
--- a/test/include/python2.7/ast.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -#ifndef Py_AST_H -#define Py_AST_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_FUNC(mod_ty) PyAST_FromNode(const node *, PyCompilerFlags *flags, - const char *, PyArena *); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_AST_H */
--- a/test/include/python2.7/bitset.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ - -#ifndef Py_BITSET_H -#define Py_BITSET_H -#ifdef __cplusplus -extern "C" { -#endif - -/* Bitset interface */ - -#define BYTE char - -typedef BYTE *bitset; - -bitset newbitset(int nbits); -void delbitset(bitset bs); -#define testbit(ss, ibit) (((ss)[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0) -int addbit(bitset bs, int ibit); /* Returns 0 if already set */ -int samebitset(bitset bs1, bitset bs2, int nbits); -void mergebitset(bitset bs1, bitset bs2, int nbits); - -#define BITSPERBYTE (8*sizeof(BYTE)) -#define NBYTES(nbits) (((nbits) + BITSPERBYTE - 1) / BITSPERBYTE) - -#define BIT2BYTE(ibit) ((ibit) / BITSPERBYTE) -#define BIT2SHIFT(ibit) ((ibit) % BITSPERBYTE) -#define BIT2MASK(ibit) (1 << BIT2SHIFT(ibit)) -#define BYTE2BIT(ibyte) ((ibyte) * BITSPERBYTE) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_BITSET_H */
--- a/test/include/python2.7/boolobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -/* Boolean object interface */ - -#ifndef Py_BOOLOBJECT_H -#define Py_BOOLOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - - -typedef PyIntObject PyBoolObject; - -PyAPI_DATA(PyTypeObject) PyBool_Type; - -#define PyBool_Check(x) (Py_TYPE(x) == &PyBool_Type) - -/* Py_False and Py_True are the only two bools in existence. -Don't forget to apply Py_INCREF() when returning either!!! */ - -/* Don't use these directly */ -PyAPI_DATA(PyIntObject) _Py_ZeroStruct, _Py_TrueStruct; - -/* Use these macros */ -#define Py_False ((PyObject *) &_Py_ZeroStruct) -#define Py_True ((PyObject *) &_Py_TrueStruct) - -/* Macros for returning Py_True or Py_False, respectively */ -#define Py_RETURN_TRUE return Py_INCREF(Py_True), Py_True -#define Py_RETURN_FALSE return Py_INCREF(Py_False), Py_False - -/* Function to return a bool from a C long */ -PyAPI_FUNC(PyObject *) PyBool_FromLong(long); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_BOOLOBJECT_H */
--- a/test/include/python2.7/bufferobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ - -/* Buffer object interface */ - -/* Note: the object's structure is private */ - -#ifndef Py_BUFFEROBJECT_H -#define Py_BUFFEROBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - - -PyAPI_DATA(PyTypeObject) PyBuffer_Type; - -#define PyBuffer_Check(op) (Py_TYPE(op) == &PyBuffer_Type) - -#define Py_END_OF_BUFFER (-1) - -PyAPI_FUNC(PyObject *) PyBuffer_FromObject(PyObject *base, - Py_ssize_t offset, Py_ssize_t size); -PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteObject(PyObject *base, - Py_ssize_t offset, - Py_ssize_t size); - -PyAPI_FUNC(PyObject *) PyBuffer_FromMemory(void *ptr, Py_ssize_t size); -PyAPI_FUNC(PyObject *) PyBuffer_FromReadWriteMemory(void *ptr, Py_ssize_t size); - -PyAPI_FUNC(PyObject *) PyBuffer_New(Py_ssize_t size); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_BUFFEROBJECT_H */
--- a/test/include/python2.7/bytearrayobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ -/* ByteArray object interface */ - -#ifndef Py_BYTEARRAYOBJECT_H -#define Py_BYTEARRAYOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -#include <stdarg.h> - -/* Type PyByteArrayObject represents a mutable array of bytes. - * The Python API is that of a sequence; - * the bytes are mapped to ints in [0, 256). - * Bytes are not characters; they may be used to encode characters. - * The only way to go between bytes and str/unicode is via encoding - * and decoding. - * For the convenience of C programmers, the bytes type is considered - * to contain a char pointer, not an unsigned char pointer. - */ - -/* Object layout */ -typedef struct { - PyObject_VAR_HEAD - /* XXX(nnorwitz): should ob_exports be Py_ssize_t? */ - int ob_exports; /* how many buffer exports */ - Py_ssize_t ob_alloc; /* How many bytes allocated */ - char *ob_bytes; -} PyByteArrayObject; - -/* Type object */ -PyAPI_DATA(PyTypeObject) PyByteArray_Type; -PyAPI_DATA(PyTypeObject) PyByteArrayIter_Type; - -/* Type check macros */ -#define PyByteArray_Check(self) PyObject_TypeCheck(self, &PyByteArray_Type) -#define PyByteArray_CheckExact(self) (Py_TYPE(self) == &PyByteArray_Type) - -/* Direct API functions */ -PyAPI_FUNC(PyObject *) PyByteArray_FromObject(PyObject *); -PyAPI_FUNC(PyObject *) PyByteArray_Concat(PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyByteArray_FromStringAndSize(const char *, Py_ssize_t); -PyAPI_FUNC(Py_ssize_t) PyByteArray_Size(PyObject *); -PyAPI_FUNC(char *) PyByteArray_AsString(PyObject *); -PyAPI_FUNC(int) PyByteArray_Resize(PyObject *, Py_ssize_t); - -/* Macros, trading safety for speed */ -#define PyByteArray_AS_STRING(self) \ - (assert(PyByteArray_Check(self)), \ - Py_SIZE(self) ? ((PyByteArrayObject *)(self))->ob_bytes : _PyByteArray_empty_string) -#define PyByteArray_GET_SIZE(self) (assert(PyByteArray_Check(self)),Py_SIZE(self)) - -PyAPI_DATA(char) _PyByteArray_empty_string[]; - -#ifdef __cplusplus -} -#endif -#endif /* !Py_BYTEARRAYOBJECT_H */
--- a/test/include/python2.7/bytes_methods.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,75 +0,0 @@ -#ifndef Py_BYTES_CTYPE_H -#define Py_BYTES_CTYPE_H - -/* - * The internal implementation behind PyString (bytes) and PyBytes (buffer) - * methods of the given names, they operate on ASCII byte strings. - */ -extern PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len); -extern PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len); -extern PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len); -extern PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len); -extern PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len); -extern PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len); -extern PyObject* _Py_bytes_istitle(const char *cptr, Py_ssize_t len); - -/* These store their len sized answer in the given preallocated *result arg. */ -extern void _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len); -extern void _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len); -extern void _Py_bytes_title(char *result, char *s, Py_ssize_t len); -extern void _Py_bytes_capitalize(char *result, char *s, Py_ssize_t len); -extern void _Py_bytes_swapcase(char *result, char *s, Py_ssize_t len); - -/* Shared __doc__ strings. */ -extern const char _Py_isspace__doc__[]; -extern const char _Py_isalpha__doc__[]; -extern const char _Py_isalnum__doc__[]; -extern const char _Py_isdigit__doc__[]; -extern const char _Py_islower__doc__[]; -extern const char _Py_isupper__doc__[]; -extern const char _Py_istitle__doc__[]; -extern const char _Py_lower__doc__[]; -extern const char _Py_upper__doc__[]; -extern const char _Py_title__doc__[]; -extern const char _Py_capitalize__doc__[]; -extern const char _Py_swapcase__doc__[]; - -/* These are left in for backward compatibility and will be removed - in 2.8/3.2 */ -#define ISLOWER(c) Py_ISLOWER(c) -#define ISUPPER(c) Py_ISUPPER(c) -#define ISALPHA(c) Py_ISALPHA(c) -#define ISDIGIT(c) Py_ISDIGIT(c) -#define ISXDIGIT(c) Py_ISXDIGIT(c) -#define ISALNUM(c) Py_ISALNUM(c) -#define ISSPACE(c) Py_ISSPACE(c) - -#undef islower -#define islower(c) undefined_islower(c) -#undef isupper -#define isupper(c) undefined_isupper(c) -#undef isalpha -#define isalpha(c) undefined_isalpha(c) -#undef isdigit -#define isdigit(c) undefined_isdigit(c) -#undef isxdigit -#define isxdigit(c) undefined_isxdigit(c) -#undef isalnum -#define isalnum(c) undefined_isalnum(c) -#undef isspace -#define isspace(c) undefined_isspace(c) - -/* These are left in for backward compatibility and will be removed - in 2.8/3.2 */ -#define TOLOWER(c) Py_TOLOWER(c) -#define TOUPPER(c) Py_TOUPPER(c) - -#undef tolower -#define tolower(c) undefined_tolower(c) -#undef toupper -#define toupper(c) undefined_toupper(c) - -/* this is needed because some docs are shared from the .o, not static */ -#define PyDoc_STRVAR_shared(name,str) const char name[] = PyDoc_STR(str) - -#endif /* !Py_BYTES_CTYPE_H */
--- a/test/include/python2.7/bytesobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ -#define PyBytesObject PyStringObject -#define PyBytes_Type PyString_Type - -#define PyBytes_Check PyString_Check -#define PyBytes_CheckExact PyString_CheckExact -#define PyBytes_CHECK_INTERNED PyString_CHECK_INTERNED -#define PyBytes_AS_STRING PyString_AS_STRING -#define PyBytes_GET_SIZE PyString_GET_SIZE -#define Py_TPFLAGS_BYTES_SUBCLASS Py_TPFLAGS_STRING_SUBCLASS - -#define PyBytes_FromStringAndSize PyString_FromStringAndSize -#define PyBytes_FromString PyString_FromString -#define PyBytes_FromFormatV PyString_FromFormatV -#define PyBytes_FromFormat PyString_FromFormat -#define PyBytes_Size PyString_Size -#define PyBytes_AsString PyString_AsString -#define PyBytes_Repr PyString_Repr -#define PyBytes_Concat PyString_Concat -#define PyBytes_ConcatAndDel PyString_ConcatAndDel -#define _PyBytes_Resize _PyString_Resize -#define _PyBytes_Eq _PyString_Eq -#define PyBytes_Format PyString_Format -#define _PyBytes_FormatLong _PyString_FormatLong -#define PyBytes_DecodeEscape PyString_DecodeEscape -#define _PyBytes_Join _PyString_Join -#define PyBytes_AsStringAndSize PyString_AsStringAndSize -#define _PyBytes_InsertThousandsGrouping _PyString_InsertThousandsGrouping
--- a/test/include/python2.7/cStringIO.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,73 +0,0 @@ -#ifndef Py_CSTRINGIO_H -#define Py_CSTRINGIO_H -#ifdef __cplusplus -extern "C" { -#endif -/* - - This header provides access to cStringIO objects from C. - Functions are provided for calling cStringIO objects and - macros are provided for testing whether you have cStringIO - objects. - - Before calling any of the functions or macros, you must initialize - the routines with: - - PycString_IMPORT - - This would typically be done in your init function. - -*/ - -#define PycStringIO_CAPSULE_NAME "cStringIO.cStringIO_CAPI" - -#define PycString_IMPORT \ - PycStringIO = ((struct PycStringIO_CAPI*)PyCapsule_Import(\ - PycStringIO_CAPSULE_NAME, 0)) - -/* Basic functions to manipulate cStringIO objects from C */ - -static struct PycStringIO_CAPI { - - /* Read a string from an input object. If the last argument - is -1, the remainder will be read. - */ - int(*cread)(PyObject *, char **, Py_ssize_t); - - /* Read a line from an input object. Returns the length of the read - line as an int and a pointer inside the object buffer as char** (so - the caller doesn't have to provide its own buffer as destination). - */ - int(*creadline)(PyObject *, char **); - - /* Write a string to an output object*/ - int(*cwrite)(PyObject *, const char *, Py_ssize_t); - - /* Get the output object as a Python string (returns new reference). */ - PyObject *(*cgetvalue)(PyObject *); - - /* Create a new output object */ - PyObject *(*NewOutput)(int); - - /* Create an input object from a Python string - (copies the Python string reference). - */ - PyObject *(*NewInput)(PyObject *); - - /* The Python types for cStringIO input and output objects. - Note that you can do input on an output object. - */ - PyTypeObject *InputType, *OutputType; - -} *PycStringIO; - -/* These can be used to test if you have one */ -#define PycStringIO_InputCheck(O) \ - (Py_TYPE(O)==PycStringIO->InputType) -#define PycStringIO_OutputCheck(O) \ - (Py_TYPE(O)==PycStringIO->OutputType) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_CSTRINGIO_H */
--- a/test/include/python2.7/cellobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ -/* Cell object interface */ - -#ifndef Py_CELLOBJECT_H -#define Py_CELLOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - PyObject_HEAD - PyObject *ob_ref; /* Content of the cell or NULL when empty */ -} PyCellObject; - -PyAPI_DATA(PyTypeObject) PyCell_Type; - -#define PyCell_Check(op) (Py_TYPE(op) == &PyCell_Type) - -PyAPI_FUNC(PyObject *) PyCell_New(PyObject *); -PyAPI_FUNC(PyObject *) PyCell_Get(PyObject *); -PyAPI_FUNC(int) PyCell_Set(PyObject *, PyObject *); - -#define PyCell_GET(op) (((PyCellObject *)(op))->ob_ref) -#define PyCell_SET(op, v) (((PyCellObject *)(op))->ob_ref = v) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_TUPLEOBJECT_H */
--- a/test/include/python2.7/ceval.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,153 +0,0 @@ -#ifndef Py_CEVAL_H -#define Py_CEVAL_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Interface to random parts in ceval.c */ - -PyAPI_FUNC(PyObject *) PyEval_CallObjectWithKeywords( - PyObject *, PyObject *, PyObject *); - -/* Inline this */ -#define PyEval_CallObject(func,arg) \ - PyEval_CallObjectWithKeywords(func, arg, (PyObject *)NULL) - -PyAPI_FUNC(PyObject *) PyEval_CallFunction(PyObject *obj, - const char *format, ...); -PyAPI_FUNC(PyObject *) PyEval_CallMethod(PyObject *obj, - const char *methodname, - const char *format, ...); - -PyAPI_FUNC(void) PyEval_SetProfile(Py_tracefunc, PyObject *); -PyAPI_FUNC(void) PyEval_SetTrace(Py_tracefunc, PyObject *); - -struct _frame; /* Avoid including frameobject.h */ - -PyAPI_FUNC(PyObject *) PyEval_GetBuiltins(void); -PyAPI_FUNC(PyObject *) PyEval_GetGlobals(void); -PyAPI_FUNC(PyObject *) PyEval_GetLocals(void); -PyAPI_FUNC(struct _frame *) PyEval_GetFrame(void); -PyAPI_FUNC(int) PyEval_GetRestricted(void); - -/* Look at the current frame's (if any) code's co_flags, and turn on - the corresponding compiler flags in cf->cf_flags. Return 1 if any - flag was set, else return 0. */ -PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf); - -PyAPI_FUNC(int) Py_FlushLine(void); - -PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg); -PyAPI_FUNC(int) Py_MakePendingCalls(void); - -/* Protection against deeply nested recursive calls */ -PyAPI_FUNC(void) Py_SetRecursionLimit(int); -PyAPI_FUNC(int) Py_GetRecursionLimit(void); - -#define Py_EnterRecursiveCall(where) \ - (_Py_MakeRecCheck(PyThreadState_GET()->recursion_depth) && \ - _Py_CheckRecursiveCall(where)) -#define Py_LeaveRecursiveCall() \ - (--PyThreadState_GET()->recursion_depth) -PyAPI_FUNC(int) _Py_CheckRecursiveCall(const char *where); -PyAPI_DATA(int) _Py_CheckRecursionLimit; -#ifdef USE_STACKCHECK -# define _Py_MakeRecCheck(x) (++(x) > --_Py_CheckRecursionLimit) -#else -# define _Py_MakeRecCheck(x) (++(x) > _Py_CheckRecursionLimit) -#endif - -PyAPI_FUNC(const char *) PyEval_GetFuncName(PyObject *); -PyAPI_FUNC(const char *) PyEval_GetFuncDesc(PyObject *); - -PyAPI_FUNC(PyObject *) PyEval_GetCallStats(PyObject *); -PyAPI_FUNC(PyObject *) PyEval_EvalFrame(struct _frame *); -PyAPI_FUNC(PyObject *) PyEval_EvalFrameEx(struct _frame *f, int exc); - -/* this used to be handled on a per-thread basis - now just two globals */ -PyAPI_DATA(volatile int) _Py_Ticker; -PyAPI_DATA(int) _Py_CheckInterval; - -/* Interface for threads. - - A module that plans to do a blocking system call (or something else - that lasts a long time and doesn't touch Python data) can allow other - threads to run as follows: - - ...preparations here... - Py_BEGIN_ALLOW_THREADS - ...blocking system call here... - Py_END_ALLOW_THREADS - ...interpret result here... - - The Py_BEGIN_ALLOW_THREADS/Py_END_ALLOW_THREADS pair expands to a - {}-surrounded block. - To leave the block in the middle (e.g., with return), you must insert - a line containing Py_BLOCK_THREADS before the return, e.g. - - if (...premature_exit...) { - Py_BLOCK_THREADS - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - - An alternative is: - - Py_BLOCK_THREADS - if (...premature_exit...) { - PyErr_SetFromErrno(PyExc_IOError); - return NULL; - } - Py_UNBLOCK_THREADS - - For convenience, that the value of 'errno' is restored across - Py_END_ALLOW_THREADS and Py_BLOCK_THREADS. - - WARNING: NEVER NEST CALLS TO Py_BEGIN_ALLOW_THREADS AND - Py_END_ALLOW_THREADS!!! - - The function PyEval_InitThreads() should be called only from - initthread() in "threadmodule.c". - - Note that not yet all candidates have been converted to use this - mechanism! -*/ - -PyAPI_FUNC(PyThreadState *) PyEval_SaveThread(void); -PyAPI_FUNC(void) PyEval_RestoreThread(PyThreadState *); - -#ifdef WITH_THREAD - -PyAPI_FUNC(int) PyEval_ThreadsInitialized(void); -PyAPI_FUNC(void) PyEval_InitThreads(void); -PyAPI_FUNC(void) PyEval_AcquireLock(void); -PyAPI_FUNC(void) PyEval_ReleaseLock(void); -PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate); -PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); -PyAPI_FUNC(void) PyEval_ReInitThreads(void); - -#define Py_BEGIN_ALLOW_THREADS { \ - PyThreadState *_save; \ - _save = PyEval_SaveThread(); -#define Py_BLOCK_THREADS PyEval_RestoreThread(_save); -#define Py_UNBLOCK_THREADS _save = PyEval_SaveThread(); -#define Py_END_ALLOW_THREADS PyEval_RestoreThread(_save); \ - } - -#else /* !WITH_THREAD */ - -#define Py_BEGIN_ALLOW_THREADS { -#define Py_BLOCK_THREADS -#define Py_UNBLOCK_THREADS -#define Py_END_ALLOW_THREADS } - -#endif /* !WITH_THREAD */ - -PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_CEVAL_H */
--- a/test/include/python2.7/classobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,83 +0,0 @@ - -/* Class object interface */ - -/* Revealing some structures (not for general use) */ - -#ifndef Py_CLASSOBJECT_H -#define Py_CLASSOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - PyObject_HEAD - PyObject *cl_bases; /* A tuple of class objects */ - PyObject *cl_dict; /* A dictionary */ - PyObject *cl_name; /* A string */ - /* The following three are functions or NULL */ - PyObject *cl_getattr; - PyObject *cl_setattr; - PyObject *cl_delattr; - PyObject *cl_weakreflist; /* List of weak references */ -} PyClassObject; - -typedef struct { - PyObject_HEAD - PyClassObject *in_class; /* The class object */ - PyObject *in_dict; /* A dictionary */ - PyObject *in_weakreflist; /* List of weak references */ -} PyInstanceObject; - -typedef struct { - PyObject_HEAD - PyObject *im_func; /* The callable object implementing the method */ - PyObject *im_self; /* The instance it is bound to, or NULL */ - PyObject *im_class; /* The class that asked for the method */ - PyObject *im_weakreflist; /* List of weak references */ -} PyMethodObject; - -PyAPI_DATA(PyTypeObject) PyClass_Type, PyInstance_Type, PyMethod_Type; - -#define PyClass_Check(op) ((op)->ob_type == &PyClass_Type) -#define PyInstance_Check(op) ((op)->ob_type == &PyInstance_Type) -#define PyMethod_Check(op) ((op)->ob_type == &PyMethod_Type) - -PyAPI_FUNC(PyObject *) PyClass_New(PyObject *, PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyInstance_New(PyObject *, PyObject *, - PyObject *); -PyAPI_FUNC(PyObject *) PyInstance_NewRaw(PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyMethod_New(PyObject *, PyObject *, PyObject *); - -PyAPI_FUNC(PyObject *) PyMethod_Function(PyObject *); -PyAPI_FUNC(PyObject *) PyMethod_Self(PyObject *); -PyAPI_FUNC(PyObject *) PyMethod_Class(PyObject *); - -/* Look up attribute with name (a string) on instance object pinst, using - * only the instance and base class dicts. If a descriptor is found in - * a class dict, the descriptor is returned without calling it. - * Returns NULL if nothing found, else a borrowed reference to the - * value associated with name in the dict in which name was found. - * The point of this routine is that it never calls arbitrary Python - * code, so is always "safe": all it does is dict lookups. The function - * can't fail, never sets an exception, and NULL is not an error (it just - * means "not found"). - */ -PyAPI_FUNC(PyObject *) _PyInstance_Lookup(PyObject *pinst, PyObject *name); - -/* Macros for direct access to these values. Type checks are *not* - done, so use with care. */ -#define PyMethod_GET_FUNCTION(meth) \ - (((PyMethodObject *)meth) -> im_func) -#define PyMethod_GET_SELF(meth) \ - (((PyMethodObject *)meth) -> im_self) -#define PyMethod_GET_CLASS(meth) \ - (((PyMethodObject *)meth) -> im_class) - -PyAPI_FUNC(int) PyClass_IsSubclass(PyObject *, PyObject *); - -PyAPI_FUNC(int) PyMethod_ClearFreeList(void); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_CLASSOBJECT_H */
--- a/test/include/python2.7/cobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ -/* - CObjects are marked Pending Deprecation as of Python 2.7. - The full schedule for 2.x is as follows: - - CObjects are marked Pending Deprecation in Python 2.7. - - CObjects will be marked Deprecated in Python 2.8 - (if there is one). - - CObjects will be removed in Python 2.9 (if there is one). - - Additionally, for the Python 3.x series: - - CObjects were marked Deprecated in Python 3.1. - - CObjects will be removed in Python 3.2. - - You should switch all use of CObjects to capsules. Capsules - have a safer and more consistent API. For more information, - see Include/pycapsule.h, or read the "Capsules" topic in - the "Python/C API Reference Manual". - - Python 2.7 no longer uses CObjects itself; all objects which - were formerly CObjects are now capsules. Note that this change - does not by itself break binary compatibility with extensions - built for previous versions of Python--PyCObject_AsVoidPtr() - has been changed to also understand capsules. - -*/ - -/* original file header comment follows: */ - -/* C objects to be exported from one extension module to another. - - C objects are used for communication between extension modules. - They provide a way for an extension module to export a C interface - to other extension modules, so that extension modules can use the - Python import mechanism to link to one another. - -*/ - -#ifndef Py_COBJECT_H -#define Py_COBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_DATA(PyTypeObject) PyCObject_Type; - -#define PyCObject_Check(op) (Py_TYPE(op) == &PyCObject_Type) - -/* Create a PyCObject from a pointer to a C object and an optional - destructor function. If the second argument is non-null, then it - will be called with the first argument if and when the PyCObject is - destroyed. - -*/ -PyAPI_FUNC(PyObject *) PyCObject_FromVoidPtr( - void *cobj, void (*destruct)(void*)); - - -/* Create a PyCObject from a pointer to a C object, a description object, - and an optional destructor function. If the third argument is non-null, - then it will be called with the first and second arguments if and when - the PyCObject is destroyed. -*/ -PyAPI_FUNC(PyObject *) PyCObject_FromVoidPtrAndDesc( - void *cobj, void *desc, void (*destruct)(void*,void*)); - -/* Retrieve a pointer to a C object from a PyCObject. */ -PyAPI_FUNC(void *) PyCObject_AsVoidPtr(PyObject *); - -/* Retrieve a pointer to a description object from a PyCObject. */ -PyAPI_FUNC(void *) PyCObject_GetDesc(PyObject *); - -/* Import a pointer to a C object from a module using a PyCObject. */ -PyAPI_FUNC(void *) PyCObject_Import(char *module_name, char *cobject_name); - -/* Modify a C object. Fails (==0) if object has a destructor. */ -PyAPI_FUNC(int) PyCObject_SetVoidPtr(PyObject *self, void *cobj); - - -typedef struct { - PyObject_HEAD - void *cobject; - void *desc; - void (*destructor)(void *); -} PyCObject; - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_COBJECT_H */
--- a/test/include/python2.7/code.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,116 +0,0 @@ -/* Definitions for bytecode */ - -#ifndef Py_CODE_H -#define Py_CODE_H -#ifdef __cplusplus -extern "C" { -#endif - -/* Bytecode object */ -typedef struct { - PyObject_HEAD - int co_argcount; /* #arguments, except *args */ - int co_nlocals; /* #local variables */ - int co_stacksize; /* #entries needed for evaluation stack */ - int co_flags; /* CO_..., see below */ - PyObject *co_code; /* instruction opcodes */ - PyObject *co_consts; /* list (constants used) */ - PyObject *co_names; /* list of strings (names used) */ - PyObject *co_varnames; /* tuple of strings (local variable names) */ - PyObject *co_freevars; /* tuple of strings (free variable names) */ - PyObject *co_cellvars; /* tuple of strings (cell variable names) */ - /* The rest doesn't count for hash/cmp */ - PyObject *co_filename; /* string (where it was loaded from) */ - PyObject *co_name; /* string (name, for reference) */ - int co_firstlineno; /* first source line number */ - PyObject *co_lnotab; /* string (encoding addr<->lineno mapping) See - Objects/lnotab_notes.txt for details. */ - void *co_zombieframe; /* for optimization only (see frameobject.c) */ - PyObject *co_weakreflist; /* to support weakrefs to code objects */ -} PyCodeObject; - -/* Masks for co_flags above */ -#define CO_OPTIMIZED 0x0001 -#define CO_NEWLOCALS 0x0002 -#define CO_VARARGS 0x0004 -#define CO_VARKEYWORDS 0x0008 -#define CO_NESTED 0x0010 -#define CO_GENERATOR 0x0020 -/* The CO_NOFREE flag is set if there are no free or cell variables. - This information is redundant, but it allows a single flag test - to determine whether there is any extra work to be done when the - call frame it setup. -*/ -#define CO_NOFREE 0x0040 - -#if 0 -/* This is no longer used. Stopped defining in 2.5, do not re-use. */ -#define CO_GENERATOR_ALLOWED 0x1000 -#endif -#define CO_FUTURE_DIVISION 0x2000 -#define CO_FUTURE_ABSOLUTE_IMPORT 0x4000 /* do absolute imports by default */ -#define CO_FUTURE_WITH_STATEMENT 0x8000 -#define CO_FUTURE_PRINT_FUNCTION 0x10000 -#define CO_FUTURE_UNICODE_LITERALS 0x20000 - -/* This should be defined if a future statement modifies the syntax. - For example, when a keyword is added. -*/ -#if 1 -#define PY_PARSER_REQUIRES_FUTURE_KEYWORD -#endif - -#define CO_MAXBLOCKS 20 /* Max static block nesting within a function */ - -PyAPI_DATA(PyTypeObject) PyCode_Type; - -#define PyCode_Check(op) (Py_TYPE(op) == &PyCode_Type) -#define PyCode_GetNumFree(op) (PyTuple_GET_SIZE((op)->co_freevars)) - -/* Public interface */ -PyAPI_FUNC(PyCodeObject *) PyCode_New( - int, int, int, int, PyObject *, PyObject *, PyObject *, PyObject *, - PyObject *, PyObject *, PyObject *, PyObject *, int, PyObject *); - /* same as struct above */ - -/* Creates a new empty code object with the specified source location. */ -PyAPI_FUNC(PyCodeObject *) -PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno); - -/* Return the line number associated with the specified bytecode index - in this code object. If you just need the line number of a frame, - use PyFrame_GetLineNumber() instead. */ -PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); - -/* for internal use only */ -#define _PyCode_GETCODEPTR(co, pp) \ - ((*Py_TYPE((co)->co_code)->tp_as_buffer->bf_getreadbuffer) \ - ((co)->co_code, 0, (void **)(pp))) - -typedef struct _addr_pair { - int ap_lower; - int ap_upper; -} PyAddrPair; - -/* Update *bounds to describe the first and one-past-the-last instructions in the - same line as lasti. Return the number of that line. -*/ -PyAPI_FUNC(int) _PyCode_CheckLineNumber(PyCodeObject* co, - int lasti, PyAddrPair *bounds); - -/* Create a comparable key used to compare constants taking in account the - * object type. It is used to make sure types are not coerced (e.g., float and - * complex) _and_ to distinguish 0.0 from -0.0 e.g. on IEEE platforms - * - * Return (type(obj), obj, ...): a tuple with variable size (at least 2 items) - * depending on the type and the value. The type is the first item to not - * compare bytes and str which can raise a BytesWarning exception. */ -PyAPI_FUNC(PyObject*) _PyCode_ConstantKey(PyObject *obj); - -PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts, - PyObject *names, PyObject *lineno_obj); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_CODE_H */
--- a/test/include/python2.7/codecs.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,212 +0,0 @@ -#ifndef Py_CODECREGISTRY_H -#define Py_CODECREGISTRY_H -#ifdef __cplusplus -extern "C" { -#endif - -/* ------------------------------------------------------------------------ - - Python Codec Registry and support functions - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -Copyright (c) Corporation for National Research Initiatives. - - ------------------------------------------------------------------------ */ - -/* Register a new codec search function. - - As side effect, this tries to load the encodings package, if not - yet done, to make sure that it is always first in the list of - search functions. - - The search_function's refcount is incremented by this function. */ - -PyAPI_FUNC(int) PyCodec_Register( - PyObject *search_function - ); - -/* Codec register lookup API. - - Looks up the given encoding and returns a CodecInfo object with - function attributes which implement the different aspects of - processing the encoding. - - The encoding string is looked up converted to all lower-case - characters. This makes encodings looked up through this mechanism - effectively case-insensitive. - - If no codec is found, a KeyError is set and NULL returned. - - As side effect, this tries to load the encodings package, if not - yet done. This is part of the lazy load strategy for the encodings - package. - - */ - -PyAPI_FUNC(PyObject *) _PyCodec_Lookup( - const char *encoding - ); - -/* Generic codec based encoding API. - - object is passed through the encoder function found for the given - encoding using the error handling method defined by errors. errors - may be NULL to use the default method defined for the codec. - - Raises a LookupError in case no encoder can be found. - - */ - -PyAPI_FUNC(PyObject *) PyCodec_Encode( - PyObject *object, - const char *encoding, - const char *errors - ); - -/* Generic codec based decoding API. - - object is passed through the decoder function found for the given - encoding using the error handling method defined by errors. errors - may be NULL to use the default method defined for the codec. - - Raises a LookupError in case no encoder can be found. - - */ - -PyAPI_FUNC(PyObject *) PyCodec_Decode( - PyObject *object, - const char *encoding, - const char *errors - ); - -/* Text codec specific encoding and decoding API. - - Checks the encoding against a list of codecs which do not - implement a unicode<->bytes encoding before attempting the - operation. - - Please note that these APIs are internal and should not - be used in Python C extensions. - - XXX (ncoghlan): should we make these, or something like them, public - in Python 3.5+? - - */ -PyAPI_FUNC(PyObject *) _PyCodec_LookupTextEncoding( - const char *encoding, - const char *alternate_command - ); - -PyAPI_FUNC(PyObject *) _PyCodec_EncodeText( - PyObject *object, - const char *encoding, - const char *errors - ); - -PyAPI_FUNC(PyObject *) _PyCodec_DecodeText( - PyObject *object, - const char *encoding, - const char *errors - ); - -/* These two aren't actually text encoding specific, but _io.TextIOWrapper - * is the only current API consumer. - */ -PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalDecoder( - PyObject *codec_info, - const char *errors - ); - -PyAPI_FUNC(PyObject *) _PyCodecInfo_GetIncrementalEncoder( - PyObject *codec_info, - const char *errors - ); - - - -/* --- Codec Lookup APIs -------------------------------------------------- - - All APIs return a codec object with incremented refcount and are - based on _PyCodec_Lookup(). The same comments w/r to the encoding - name also apply to these APIs. - -*/ - -/* Get an encoder function for the given encoding. */ - -PyAPI_FUNC(PyObject *) PyCodec_Encoder( - const char *encoding - ); - -/* Get a decoder function for the given encoding. */ - -PyAPI_FUNC(PyObject *) PyCodec_Decoder( - const char *encoding - ); - -/* Get a IncrementalEncoder object for the given encoding. */ - -PyAPI_FUNC(PyObject *) PyCodec_IncrementalEncoder( - const char *encoding, - const char *errors - ); - -/* Get a IncrementalDecoder object function for the given encoding. */ - -PyAPI_FUNC(PyObject *) PyCodec_IncrementalDecoder( - const char *encoding, - const char *errors - ); - -/* Get a StreamReader factory function for the given encoding. */ - -PyAPI_FUNC(PyObject *) PyCodec_StreamReader( - const char *encoding, - PyObject *stream, - const char *errors - ); - -/* Get a StreamWriter factory function for the given encoding. */ - -PyAPI_FUNC(PyObject *) PyCodec_StreamWriter( - const char *encoding, - PyObject *stream, - const char *errors - ); - -/* Unicode encoding error handling callback registry API */ - -/* Register the error handling callback function error under the given - name. This function will be called by the codec when it encounters - unencodable characters/undecodable bytes and doesn't know the - callback name, when name is specified as the error parameter - in the call to the encode/decode function. - Return 0 on success, -1 on error */ -PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error); - -/* Lookup the error handling callback function registered under the given - name. As a special case NULL can be passed, in which case - the error handling callback for "strict" will be returned. */ -PyAPI_FUNC(PyObject *) PyCodec_LookupError(const char *name); - -/* raise exc as an exception */ -PyAPI_FUNC(PyObject *) PyCodec_StrictErrors(PyObject *exc); - -/* ignore the unicode error, skipping the faulty input */ -PyAPI_FUNC(PyObject *) PyCodec_IgnoreErrors(PyObject *exc); - -/* replace the unicode encode error with ? or U+FFFD */ -PyAPI_FUNC(PyObject *) PyCodec_ReplaceErrors(PyObject *exc); - -/* replace the unicode encode error with XML character references */ -PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc); - -/* replace the unicode encode error with backslash escapes (\x, \u and \U) */ -PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_CODECREGISTRY_H */
--- a/test/include/python2.7/compile.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ - -#ifndef Py_COMPILE_H -#define Py_COMPILE_H - -#include "code.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Public interface */ -struct _node; /* Declare the existence of this type */ -PyAPI_FUNC(PyCodeObject *) PyNode_Compile(struct _node *, const char *); - -/* Future feature support */ - -typedef struct { - int ff_features; /* flags set by future statements */ - int ff_lineno; /* line number of last future statement */ -} PyFutureFeatures; - -#define FUTURE_NESTED_SCOPES "nested_scopes" -#define FUTURE_GENERATORS "generators" -#define FUTURE_DIVISION "division" -#define FUTURE_ABSOLUTE_IMPORT "absolute_import" -#define FUTURE_WITH_STATEMENT "with_statement" -#define FUTURE_PRINT_FUNCTION "print_function" -#define FUTURE_UNICODE_LITERALS "unicode_literals" - - -struct _mod; /* Declare the existence of this type */ -PyAPI_FUNC(PyCodeObject *) PyAST_Compile(struct _mod *, const char *, - PyCompilerFlags *, PyArena *); -PyAPI_FUNC(PyFutureFeatures *) PyFuture_FromAST(struct _mod *, const char *); - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_COMPILE_H */
--- a/test/include/python2.7/complexobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,66 +0,0 @@ -/* Complex number structure */ - -#ifndef Py_COMPLEXOBJECT_H -#define Py_COMPLEXOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - double real; - double imag; -} Py_complex; - -/* Operations on complex numbers from complexmodule.c */ - -#define c_sum _Py_c_sum -#define c_diff _Py_c_diff -#define c_neg _Py_c_neg -#define c_prod _Py_c_prod -#define c_quot _Py_c_quot -#define c_pow _Py_c_pow -#define c_abs _Py_c_abs - -PyAPI_FUNC(Py_complex) c_sum(Py_complex, Py_complex); -PyAPI_FUNC(Py_complex) c_diff(Py_complex, Py_complex); -PyAPI_FUNC(Py_complex) c_neg(Py_complex); -PyAPI_FUNC(Py_complex) c_prod(Py_complex, Py_complex); -PyAPI_FUNC(Py_complex) c_quot(Py_complex, Py_complex); -PyAPI_FUNC(Py_complex) c_pow(Py_complex, Py_complex); -PyAPI_FUNC(double) c_abs(Py_complex); - - -/* Complex object interface */ - -/* -PyComplexObject represents a complex number with double-precision -real and imaginary parts. -*/ - -typedef struct { - PyObject_HEAD - Py_complex cval; -} PyComplexObject; - -PyAPI_DATA(PyTypeObject) PyComplex_Type; - -#define PyComplex_Check(op) PyObject_TypeCheck(op, &PyComplex_Type) -#define PyComplex_CheckExact(op) (Py_TYPE(op) == &PyComplex_Type) - -PyAPI_FUNC(PyObject *) PyComplex_FromCComplex(Py_complex); -PyAPI_FUNC(PyObject *) PyComplex_FromDoubles(double real, double imag); - -PyAPI_FUNC(double) PyComplex_RealAsDouble(PyObject *op); -PyAPI_FUNC(double) PyComplex_ImagAsDouble(PyObject *op); -PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op); - -/* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyComplex_FormatAdvanced(PyObject *obj, - char *format_spec, - Py_ssize_t format_spec_len); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_COMPLEXOBJECT_H */
--- a/test/include/python2.7/datetime.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,239 +0,0 @@ -/* datetime.h - */ - -#ifndef DATETIME_H -#define DATETIME_H -#ifdef __cplusplus -extern "C" { -#endif - -/* Fields are packed into successive bytes, each viewed as unsigned and - * big-endian, unless otherwise noted: - * - * byte offset - * 0 year 2 bytes, 1-9999 - * 2 month 1 byte, 1-12 - * 3 day 1 byte, 1-31 - * 4 hour 1 byte, 0-23 - * 5 minute 1 byte, 0-59 - * 6 second 1 byte, 0-59 - * 7 usecond 3 bytes, 0-999999 - * 10 - */ - -/* # of bytes for year, month, and day. */ -#define _PyDateTime_DATE_DATASIZE 4 - -/* # of bytes for hour, minute, second, and usecond. */ -#define _PyDateTime_TIME_DATASIZE 6 - -/* # of bytes for year, month, day, hour, minute, second, and usecond. */ -#define _PyDateTime_DATETIME_DATASIZE 10 - - -typedef struct -{ - PyObject_HEAD - long hashcode; /* -1 when unknown */ - int days; /* -MAX_DELTA_DAYS <= days <= MAX_DELTA_DAYS */ - int seconds; /* 0 <= seconds < 24*3600 is invariant */ - int microseconds; /* 0 <= microseconds < 1000000 is invariant */ -} PyDateTime_Delta; - -typedef struct -{ - PyObject_HEAD /* a pure abstract base class */ -} PyDateTime_TZInfo; - - -/* The datetime and time types have hashcodes, and an optional tzinfo member, - * present if and only if hastzinfo is true. - */ -#define _PyTZINFO_HEAD \ - PyObject_HEAD \ - long hashcode; \ - char hastzinfo; /* boolean flag */ - -/* No _PyDateTime_BaseTZInfo is allocated; it's just to have something - * convenient to cast to, when getting at the hastzinfo member of objects - * starting with _PyTZINFO_HEAD. - */ -typedef struct -{ - _PyTZINFO_HEAD -} _PyDateTime_BaseTZInfo; - -/* All time objects are of PyDateTime_TimeType, but that can be allocated - * in two ways, with or without a tzinfo member. Without is the same as - * tzinfo == None, but consumes less memory. _PyDateTime_BaseTime is an - * internal struct used to allocate the right amount of space for the - * "without" case. - */ -#define _PyDateTime_TIMEHEAD \ - _PyTZINFO_HEAD \ - unsigned char data[_PyDateTime_TIME_DATASIZE]; - -typedef struct -{ - _PyDateTime_TIMEHEAD -} _PyDateTime_BaseTime; /* hastzinfo false */ - -typedef struct -{ - _PyDateTime_TIMEHEAD - PyObject *tzinfo; -} PyDateTime_Time; /* hastzinfo true */ - - -/* All datetime objects are of PyDateTime_DateTimeType, but that can be - * allocated in two ways too, just like for time objects above. In addition, - * the plain date type is a base class for datetime, so it must also have - * a hastzinfo member (although it's unused there). - */ -typedef struct -{ - _PyTZINFO_HEAD - unsigned char data[_PyDateTime_DATE_DATASIZE]; -} PyDateTime_Date; - -#define _PyDateTime_DATETIMEHEAD \ - _PyTZINFO_HEAD \ - unsigned char data[_PyDateTime_DATETIME_DATASIZE]; - -typedef struct -{ - _PyDateTime_DATETIMEHEAD -} _PyDateTime_BaseDateTime; /* hastzinfo false */ - -typedef struct -{ - _PyDateTime_DATETIMEHEAD - PyObject *tzinfo; -} PyDateTime_DateTime; /* hastzinfo true */ - - -/* Apply for date and datetime instances. */ -#define PyDateTime_GET_YEAR(o) ((((PyDateTime_Date*)o)->data[0] << 8) | \ - ((PyDateTime_Date*)o)->data[1]) -#define PyDateTime_GET_MONTH(o) (((PyDateTime_Date*)o)->data[2]) -#define PyDateTime_GET_DAY(o) (((PyDateTime_Date*)o)->data[3]) - -#define PyDateTime_DATE_GET_HOUR(o) (((PyDateTime_DateTime*)o)->data[4]) -#define PyDateTime_DATE_GET_MINUTE(o) (((PyDateTime_DateTime*)o)->data[5]) -#define PyDateTime_DATE_GET_SECOND(o) (((PyDateTime_DateTime*)o)->data[6]) -#define PyDateTime_DATE_GET_MICROSECOND(o) \ - ((((PyDateTime_DateTime*)o)->data[7] << 16) | \ - (((PyDateTime_DateTime*)o)->data[8] << 8) | \ - ((PyDateTime_DateTime*)o)->data[9]) - -/* Apply for time instances. */ -#define PyDateTime_TIME_GET_HOUR(o) (((PyDateTime_Time*)o)->data[0]) -#define PyDateTime_TIME_GET_MINUTE(o) (((PyDateTime_Time*)o)->data[1]) -#define PyDateTime_TIME_GET_SECOND(o) (((PyDateTime_Time*)o)->data[2]) -#define PyDateTime_TIME_GET_MICROSECOND(o) \ - ((((PyDateTime_Time*)o)->data[3] << 16) | \ - (((PyDateTime_Time*)o)->data[4] << 8) | \ - ((PyDateTime_Time*)o)->data[5]) - - -/* Define structure for C API. */ -typedef struct { - /* type objects */ - PyTypeObject *DateType; - PyTypeObject *DateTimeType; - PyTypeObject *TimeType; - PyTypeObject *DeltaType; - PyTypeObject *TZInfoType; - - /* constructors */ - PyObject *(*Date_FromDate)(int, int, int, PyTypeObject*); - PyObject *(*DateTime_FromDateAndTime)(int, int, int, int, int, int, int, - PyObject*, PyTypeObject*); - PyObject *(*Time_FromTime)(int, int, int, int, PyObject*, PyTypeObject*); - PyObject *(*Delta_FromDelta)(int, int, int, int, PyTypeObject*); - - /* constructors for the DB API */ - PyObject *(*DateTime_FromTimestamp)(PyObject*, PyObject*, PyObject*); - PyObject *(*Date_FromTimestamp)(PyObject*, PyObject*); - -} PyDateTime_CAPI; - -#define PyDateTime_CAPSULE_NAME "datetime.datetime_CAPI" - - -/* "magic" constant used to partially protect against developer mistakes. */ -#define DATETIME_API_MAGIC 0x414548d5 - -#ifdef Py_BUILD_CORE - -/* Macros for type checking when building the Python core. */ -#define PyDate_Check(op) PyObject_TypeCheck(op, &PyDateTime_DateType) -#define PyDate_CheckExact(op) (Py_TYPE(op) == &PyDateTime_DateType) - -#define PyDateTime_Check(op) PyObject_TypeCheck(op, &PyDateTime_DateTimeType) -#define PyDateTime_CheckExact(op) (Py_TYPE(op) == &PyDateTime_DateTimeType) - -#define PyTime_Check(op) PyObject_TypeCheck(op, &PyDateTime_TimeType) -#define PyTime_CheckExact(op) (Py_TYPE(op) == &PyDateTime_TimeType) - -#define PyDelta_Check(op) PyObject_TypeCheck(op, &PyDateTime_DeltaType) -#define PyDelta_CheckExact(op) (Py_TYPE(op) == &PyDateTime_DeltaType) - -#define PyTZInfo_Check(op) PyObject_TypeCheck(op, &PyDateTime_TZInfoType) -#define PyTZInfo_CheckExact(op) (Py_TYPE(op) == &PyDateTime_TZInfoType) - -#else - -/* Define global variable for the C API and a macro for setting it. */ -static PyDateTime_CAPI *PyDateTimeAPI = NULL; - -#define PyDateTime_IMPORT \ - PyDateTimeAPI = (PyDateTime_CAPI *)PyCapsule_Import(PyDateTime_CAPSULE_NAME, 0) - -/* Macros for type checking when not building the Python core. */ -#define PyDate_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->DateType) -#define PyDate_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->DateType) - -#define PyDateTime_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->DateTimeType) -#define PyDateTime_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->DateTimeType) - -#define PyTime_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->TimeType) -#define PyTime_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->TimeType) - -#define PyDelta_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->DeltaType) -#define PyDelta_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->DeltaType) - -#define PyTZInfo_Check(op) PyObject_TypeCheck(op, PyDateTimeAPI->TZInfoType) -#define PyTZInfo_CheckExact(op) (Py_TYPE(op) == PyDateTimeAPI->TZInfoType) - -/* Macros for accessing constructors in a simplified fashion. */ -#define PyDate_FromDate(year, month, day) \ - PyDateTimeAPI->Date_FromDate(year, month, day, PyDateTimeAPI->DateType) - -#define PyDateTime_FromDateAndTime(year, month, day, hour, min, sec, usec) \ - PyDateTimeAPI->DateTime_FromDateAndTime(year, month, day, hour, \ - min, sec, usec, Py_None, PyDateTimeAPI->DateTimeType) - -#define PyTime_FromTime(hour, minute, second, usecond) \ - PyDateTimeAPI->Time_FromTime(hour, minute, second, usecond, \ - Py_None, PyDateTimeAPI->TimeType) - -#define PyDelta_FromDSU(days, seconds, useconds) \ - PyDateTimeAPI->Delta_FromDelta(days, seconds, useconds, 1, \ - PyDateTimeAPI->DeltaType) - -/* Macros supporting the DB API. */ -#define PyDateTime_FromTimestamp(args) \ - PyDateTimeAPI->DateTime_FromTimestamp( \ - (PyObject*) (PyDateTimeAPI->DateTimeType), args, NULL) - -#define PyDate_FromTimestamp(args) \ - PyDateTimeAPI->Date_FromTimestamp( \ - (PyObject*) (PyDateTimeAPI->DateType), args) - -#endif /* Py_BUILD_CORE */ - -#ifdef __cplusplus -} -#endif -#endif
--- a/test/include/python2.7/descrobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ -/* Descriptors */ -#ifndef Py_DESCROBJECT_H -#define Py_DESCROBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef PyObject *(*getter)(PyObject *, void *); -typedef int (*setter)(PyObject *, PyObject *, void *); - -typedef struct PyGetSetDef { - char *name; - getter get; - setter set; - char *doc; - void *closure; -} PyGetSetDef; - -typedef PyObject *(*wrapperfunc)(PyObject *self, PyObject *args, - void *wrapped); - -typedef PyObject *(*wrapperfunc_kwds)(PyObject *self, PyObject *args, - void *wrapped, PyObject *kwds); - -struct wrapperbase { - char *name; - int offset; - void *function; - wrapperfunc wrapper; - char *doc; - int flags; - PyObject *name_strobj; -}; - -/* Flags for above struct */ -#define PyWrapperFlag_KEYWORDS 1 /* wrapper function takes keyword args */ - -/* Various kinds of descriptor objects */ - -#define PyDescr_COMMON \ - PyObject_HEAD \ - PyTypeObject *d_type; \ - PyObject *d_name - -typedef struct { - PyDescr_COMMON; -} PyDescrObject; - -typedef struct { - PyDescr_COMMON; - PyMethodDef *d_method; -} PyMethodDescrObject; - -typedef struct { - PyDescr_COMMON; - struct PyMemberDef *d_member; -} PyMemberDescrObject; - -typedef struct { - PyDescr_COMMON; - PyGetSetDef *d_getset; -} PyGetSetDescrObject; - -typedef struct { - PyDescr_COMMON; - struct wrapperbase *d_base; - void *d_wrapped; /* This can be any function pointer */ -} PyWrapperDescrObject; - -PyAPI_DATA(PyTypeObject) PyWrapperDescr_Type; -PyAPI_DATA(PyTypeObject) PyDictProxy_Type; -PyAPI_DATA(PyTypeObject) PyGetSetDescr_Type; -PyAPI_DATA(PyTypeObject) PyMemberDescr_Type; - -PyAPI_FUNC(PyObject *) PyDescr_NewMethod(PyTypeObject *, PyMethodDef *); -PyAPI_FUNC(PyObject *) PyDescr_NewClassMethod(PyTypeObject *, PyMethodDef *); -PyAPI_FUNC(PyObject *) PyDescr_NewMember(PyTypeObject *, - struct PyMemberDef *); -PyAPI_FUNC(PyObject *) PyDescr_NewGetSet(PyTypeObject *, - struct PyGetSetDef *); -PyAPI_FUNC(PyObject *) PyDescr_NewWrapper(PyTypeObject *, - struct wrapperbase *, void *); -#define PyDescr_IsData(d) (Py_TYPE(d)->tp_descr_set != NULL) - -PyAPI_FUNC(PyObject *) PyDictProxy_New(PyObject *); -PyAPI_FUNC(PyObject *) PyWrapper_New(PyObject *, PyObject *); - - -PyAPI_DATA(PyTypeObject) PyProperty_Type; -#ifdef __cplusplus -} -#endif -#endif /* !Py_DESCROBJECT_H */ -
--- a/test/include/python2.7/dictobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,157 +0,0 @@ -#ifndef Py_DICTOBJECT_H -#define Py_DICTOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Dictionary object type -- mapping from hashable object to object */ - -/* The distribution includes a separate file, Objects/dictnotes.txt, - describing explorations into dictionary design and optimization. - It covers typical dictionary use patterns, the parameters for - tuning dictionaries, and several ideas for possible optimizations. -*/ - -/* -There are three kinds of slots in the table: - -1. Unused. me_key == me_value == NULL - Does not hold an active (key, value) pair now and never did. Unused can - transition to Active upon key insertion. This is the only case in which - me_key is NULL, and is each slot's initial state. - -2. Active. me_key != NULL and me_key != dummy and me_value != NULL - Holds an active (key, value) pair. Active can transition to Dummy upon - key deletion. This is the only case in which me_value != NULL. - -3. Dummy. me_key == dummy and me_value == NULL - Previously held an active (key, value) pair, but that was deleted and an - active pair has not yet overwritten the slot. Dummy can transition to - Active upon key insertion. Dummy slots cannot be made Unused again - (cannot have me_key set to NULL), else the probe sequence in case of - collision would have no way to know they were once active. - -Note: .popitem() abuses the me_hash field of an Unused or Dummy slot to -hold a search finger. The me_hash field of Unused or Dummy slots has no -meaning otherwise. -*/ - -/* PyDict_MINSIZE is the minimum size of a dictionary. This many slots are - * allocated directly in the dict object (in the ma_smalltable member). - * It must be a power of 2, and at least 4. 8 allows dicts with no more - * than 5 active entries to live in ma_smalltable (and so avoid an - * additional malloc); instrumentation suggested this suffices for the - * majority of dicts (consisting mostly of usually-small instance dicts and - * usually-small dicts created to pass keyword arguments). - */ -#define PyDict_MINSIZE 8 - -typedef struct { - /* Cached hash code of me_key. Note that hash codes are C longs. - * We have to use Py_ssize_t instead because dict_popitem() abuses - * me_hash to hold a search finger. - */ - Py_ssize_t me_hash; - PyObject *me_key; - PyObject *me_value; -} PyDictEntry; - -/* -To ensure the lookup algorithm terminates, there must be at least one Unused -slot (NULL key) in the table. -The value ma_fill is the number of non-NULL keys (sum of Active and Dummy); -ma_used is the number of non-NULL, non-dummy keys (== the number of non-NULL -values == the number of Active items). -To avoid slowing down lookups on a near-full table, we resize the table when -it's two-thirds full. -*/ -typedef struct _dictobject PyDictObject; -struct _dictobject { - PyObject_HEAD - Py_ssize_t ma_fill; /* # Active + # Dummy */ - Py_ssize_t ma_used; /* # Active */ - - /* The table contains ma_mask + 1 slots, and that's a power of 2. - * We store the mask instead of the size because the mask is more - * frequently needed. - */ - Py_ssize_t ma_mask; - - /* ma_table points to ma_smalltable for small tables, else to - * additional malloc'ed memory. ma_table is never NULL! This rule - * saves repeated runtime null-tests in the workhorse getitem and - * setitem calls. - */ - PyDictEntry *ma_table; - PyDictEntry *(*ma_lookup)(PyDictObject *mp, PyObject *key, long hash); - PyDictEntry ma_smalltable[PyDict_MINSIZE]; -}; - -PyAPI_DATA(PyTypeObject) PyDict_Type; -PyAPI_DATA(PyTypeObject) PyDictIterKey_Type; -PyAPI_DATA(PyTypeObject) PyDictIterValue_Type; -PyAPI_DATA(PyTypeObject) PyDictIterItem_Type; -PyAPI_DATA(PyTypeObject) PyDictKeys_Type; -PyAPI_DATA(PyTypeObject) PyDictItems_Type; -PyAPI_DATA(PyTypeObject) PyDictValues_Type; - -#define PyDict_Check(op) \ - PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_DICT_SUBCLASS) -#define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type) -#define PyDictKeys_Check(op) (Py_TYPE(op) == &PyDictKeys_Type) -#define PyDictItems_Check(op) (Py_TYPE(op) == &PyDictItems_Type) -#define PyDictValues_Check(op) (Py_TYPE(op) == &PyDictValues_Type) -/* This excludes Values, since they are not sets. */ -# define PyDictViewSet_Check(op) \ - (PyDictKeys_Check(op) || PyDictItems_Check(op)) - -PyAPI_FUNC(PyObject *) PyDict_New(void); -PyAPI_FUNC(PyObject *) PyDict_GetItem(PyObject *mp, PyObject *key); -PyAPI_FUNC(PyObject *) _PyDict_GetItemWithError(PyObject *mp, PyObject *key); -PyAPI_FUNC(int) PyDict_SetItem(PyObject *mp, PyObject *key, PyObject *item); -PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key); -PyAPI_FUNC(void) PyDict_Clear(PyObject *mp); -PyAPI_FUNC(int) PyDict_Next( - PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value); -PyAPI_FUNC(int) _PyDict_Next( - PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, long *hash); -PyAPI_FUNC(PyObject *) PyDict_Keys(PyObject *mp); -PyAPI_FUNC(PyObject *) PyDict_Values(PyObject *mp); -PyAPI_FUNC(PyObject *) PyDict_Items(PyObject *mp); -PyAPI_FUNC(Py_ssize_t) PyDict_Size(PyObject *mp); -PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp); -PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key); -PyAPI_FUNC(int) _PyDict_Contains(PyObject *mp, PyObject *key, long hash); -PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused); -PyAPI_FUNC(void) _PyDict_MaybeUntrack(PyObject *mp); - -/* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */ -PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other); - -/* PyDict_Merge updates/merges from a mapping object (an object that - supports PyMapping_Keys() and PyObject_GetItem()). If override is true, - the last occurrence of a key wins, else the first. The Python - dict.update(other) is equivalent to PyDict_Merge(dict, other, 1). -*/ -PyAPI_FUNC(int) PyDict_Merge(PyObject *mp, - PyObject *other, - int override); - -/* PyDict_MergeFromSeq2 updates/merges from an iterable object producing - iterable objects of length 2. If override is true, the last occurrence - of a key wins, else the first. The Python dict constructor dict(seq2) - is equivalent to dict={}; PyDict_MergeFromSeq(dict, seq2, 1). -*/ -PyAPI_FUNC(int) PyDict_MergeFromSeq2(PyObject *d, - PyObject *seq2, - int override); - -PyAPI_FUNC(PyObject *) PyDict_GetItemString(PyObject *dp, const char *key); -PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *item); -PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_DICTOBJECT_H */
--- a/test/include/python2.7/dtoa.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ -#ifndef PY_NO_SHORT_FLOAT_REPR -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_FUNC(double) _Py_dg_strtod(const char *str, char **ptr); -PyAPI_FUNC(char *) _Py_dg_dtoa(double d, int mode, int ndigits, - int *decpt, int *sign, char **rve); -PyAPI_FUNC(void) _Py_dg_freedtoa(char *s); - - -#ifdef __cplusplus -} -#endif -#endif
--- a/test/include/python2.7/enumobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ -#ifndef Py_ENUMOBJECT_H -#define Py_ENUMOBJECT_H - -/* Enumerate Object */ - -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_DATA(PyTypeObject) PyEnum_Type; -PyAPI_DATA(PyTypeObject) PyReversed_Type; - -#ifdef __cplusplus -} -#endif - -#endif /* !Py_ENUMOBJECT_H */
--- a/test/include/python2.7/errcode.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -#ifndef Py_ERRCODE_H -#define Py_ERRCODE_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Error codes passed around between file input, tokenizer, parser and - interpreter. This is necessary so we can turn them into Python - exceptions at a higher level. Note that some errors have a - slightly different meaning when passed from the tokenizer to the - parser than when passed from the parser to the interpreter; e.g. - the parser only returns E_EOF when it hits EOF immediately, and it - never returns E_OK. */ - -#define E_OK 10 /* No error */ -#define E_EOF 11 /* End Of File */ -#define E_INTR 12 /* Interrupted */ -#define E_TOKEN 13 /* Bad token */ -#define E_SYNTAX 14 /* Syntax error */ -#define E_NOMEM 15 /* Ran out of memory */ -#define E_DONE 16 /* Parsing complete */ -#define E_ERROR 17 /* Execution error */ -#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */ -#define E_OVERFLOW 19 /* Node had too many children */ -#define E_TOODEEP 20 /* Too many indentation levels */ -#define E_DEDENT 21 /* No matching outer block for dedent */ -#define E_DECODE 22 /* Error in decoding into Unicode */ -#define E_EOFS 23 /* EOF in triple-quoted string */ -#define E_EOLS 24 /* EOL in single-quoted string */ -#define E_LINECONT 25 /* Unexpected characters after a line continuation */ - -#ifdef __cplusplus -} -#endif -#endif /* !Py_ERRCODE_H */
--- a/test/include/python2.7/eval.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ - -/* Interface to execute compiled code */ - -#ifndef Py_EVAL_H -#define Py_EVAL_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_FUNC(PyObject *) PyEval_EvalCode(PyCodeObject *, PyObject *, PyObject *); - -PyAPI_FUNC(PyObject *) PyEval_EvalCodeEx(PyCodeObject *co, - PyObject *globals, - PyObject *locals, - PyObject **args, int argc, - PyObject **kwds, int kwdc, - PyObject **defs, int defc, - PyObject *closure); - -PyAPI_FUNC(PyObject *) _PyEval_CallTracing(PyObject *func, PyObject *args); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_EVAL_H */
--- a/test/include/python2.7/fileobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ - -/* File object interface */ - -#ifndef Py_FILEOBJECT_H -#define Py_FILEOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - PyObject_HEAD - FILE *f_fp; - PyObject *f_name; - PyObject *f_mode; - int (*f_close)(FILE *); - int f_softspace; /* Flag used by 'print' command */ - int f_binary; /* Flag which indicates whether the file is - open in binary (1) or text (0) mode */ - char* f_buf; /* Allocated readahead buffer */ - char* f_bufend; /* Points after last occupied position */ - char* f_bufptr; /* Current buffer position */ - char *f_setbuf; /* Buffer for setbuf(3) and setvbuf(3) */ - int f_univ_newline; /* Handle any newline convention */ - int f_newlinetypes; /* Types of newlines seen */ - int f_skipnextlf; /* Skip next \n */ - PyObject *f_encoding; - PyObject *f_errors; - PyObject *weakreflist; /* List of weak references */ - int unlocked_count; /* Num. currently running sections of code - using f_fp with the GIL released. */ - int readable; - int writable; -} PyFileObject; - -PyAPI_DATA(PyTypeObject) PyFile_Type; - -#define PyFile_Check(op) PyObject_TypeCheck(op, &PyFile_Type) -#define PyFile_CheckExact(op) (Py_TYPE(op) == &PyFile_Type) - -PyAPI_FUNC(PyObject *) PyFile_FromString(char *, char *); -PyAPI_FUNC(void) PyFile_SetBufSize(PyObject *, int); -PyAPI_FUNC(int) PyFile_SetEncoding(PyObject *, const char *); -PyAPI_FUNC(int) PyFile_SetEncodingAndErrors(PyObject *, const char *, char *errors); -PyAPI_FUNC(PyObject *) PyFile_FromFile(FILE *, char *, char *, - int (*)(FILE *)); -PyAPI_FUNC(FILE *) PyFile_AsFile(PyObject *); -PyAPI_FUNC(void) PyFile_IncUseCount(PyFileObject *); -PyAPI_FUNC(void) PyFile_DecUseCount(PyFileObject *); -PyAPI_FUNC(PyObject *) PyFile_Name(PyObject *); -PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int); -PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int); -PyAPI_FUNC(int) PyFile_SoftSpace(PyObject *, int); -PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *); -PyAPI_FUNC(int) PyObject_AsFileDescriptor(PyObject *); - -/* The default encoding used by the platform file system APIs - If non-NULL, this is different than the default encoding for strings -*/ -PyAPI_DATA(const char *) Py_FileSystemDefaultEncoding; - -/* Routines to replace fread() and fgets() which accept any of \r, \n - or \r\n as line terminators. -*/ -#define PY_STDIOTEXTMODE "b" -char *Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *); -size_t Py_UniversalNewlineFread(char *, size_t, FILE *, PyObject *); - -/* A routine to do sanity checking on the file mode string. returns - non-zero on if an exception occurred -*/ -int _PyFile_SanitizeMode(char *mode); - -#if defined _MSC_VER && _MSC_VER >= 1400 -/* A routine to check if a file descriptor is valid on Windows. Returns 0 - * and sets errno to EBADF if it isn't. This is to avoid Assertions - * from various functions in the Windows CRT beginning with - * Visual Studio 2005 - */ -int _PyVerify_fd(int fd); -#elif defined _MSC_VER && _MSC_VER >= 1200 -/* fdopen doesn't set errno EBADF and crashes for large fd on debug build */ -#define _PyVerify_fd(fd) (_get_osfhandle(fd) >= 0) -#else -#define _PyVerify_fd(A) (1) /* dummy */ -#endif - -/* A routine to check if a file descriptor can be select()-ed. */ -#ifdef HAVE_SELECT - #define _PyIsSelectable_fd(FD) (((FD) >= 0) && ((FD) < FD_SETSIZE)) -#else - #define _PyIsSelectable_fd(FD) (1) -#endif /* HAVE_SELECT */ - -#ifdef __cplusplus -} -#endif -#endif /* !Py_FILEOBJECT_H */
--- a/test/include/python2.7/floatobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,140 +0,0 @@ - -/* Float object interface */ - -/* -PyFloatObject represents a (double precision) floating point number. -*/ - -#ifndef Py_FLOATOBJECT_H -#define Py_FLOATOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - PyObject_HEAD - double ob_fval; -} PyFloatObject; - -PyAPI_DATA(PyTypeObject) PyFloat_Type; - -#define PyFloat_Check(op) PyObject_TypeCheck(op, &PyFloat_Type) -#define PyFloat_CheckExact(op) (Py_TYPE(op) == &PyFloat_Type) - -/* The str() precision PyFloat_STR_PRECISION is chosen so that in most cases, - the rounding noise created by various operations is suppressed, while - giving plenty of precision for practical use. */ - -#define PyFloat_STR_PRECISION 12 - -#ifdef Py_NAN -#define Py_RETURN_NAN return PyFloat_FromDouble(Py_NAN) -#endif - -#define Py_RETURN_INF(sign) do \ - if (copysign(1., sign) == 1.) { \ - return PyFloat_FromDouble(Py_HUGE_VAL); \ - } else { \ - return PyFloat_FromDouble(-Py_HUGE_VAL); \ - } while(0) - -PyAPI_FUNC(double) PyFloat_GetMax(void); -PyAPI_FUNC(double) PyFloat_GetMin(void); -PyAPI_FUNC(PyObject *) PyFloat_GetInfo(void); - -/* Return Python float from string PyObject. Second argument ignored on - input, and, if non-NULL, NULL is stored into *junk (this tried to serve a - purpose once but can't be made to work as intended). */ -PyAPI_FUNC(PyObject *) PyFloat_FromString(PyObject*, char** junk); - -/* Return Python float from C double. */ -PyAPI_FUNC(PyObject *) PyFloat_FromDouble(double); - -/* Extract C double from Python float. The macro version trades safety for - speed. */ -PyAPI_FUNC(double) PyFloat_AsDouble(PyObject *); -#define PyFloat_AS_DOUBLE(op) (((PyFloatObject *)(op))->ob_fval) - -/* Write repr(v) into the char buffer argument, followed by null byte. The - buffer must be "big enough"; >= 100 is very safe. - PyFloat_AsReprString(buf, x) strives to print enough digits so that - PyFloat_FromString(buf) then reproduces x exactly. */ -PyAPI_FUNC(void) PyFloat_AsReprString(char*, PyFloatObject *v); - -/* Write str(v) into the char buffer argument, followed by null byte. The - buffer must be "big enough"; >= 100 is very safe. Note that it's - unusual to be able to get back the float you started with from - PyFloat_AsString's result -- use PyFloat_AsReprString() if you want to - preserve precision across conversions. */ -PyAPI_FUNC(void) PyFloat_AsString(char*, PyFloatObject *v); - -/* _PyFloat_{Pack,Unpack}{4,8} - * - * The struct and pickle (at least) modules need an efficient platform- - * independent way to store floating-point values as byte strings. - * The Pack routines produce a string from a C double, and the Unpack - * routines produce a C double from such a string. The suffix (4 or 8) - * specifies the number of bytes in the string. - * - * On platforms that appear to use (see _PyFloat_Init()) IEEE-754 formats - * these functions work by copying bits. On other platforms, the formats the - * 4- byte format is identical to the IEEE-754 single precision format, and - * the 8-byte format to the IEEE-754 double precision format, although the - * packing of INFs and NaNs (if such things exist on the platform) isn't - * handled correctly, and attempting to unpack a string containing an IEEE - * INF or NaN will raise an exception. - * - * On non-IEEE platforms with more precision, or larger dynamic range, than - * 754 supports, not all values can be packed; on non-IEEE platforms with less - * precision, or smaller dynamic range, not all values can be unpacked. What - * happens in such cases is partly accidental (alas). - */ - -/* The pack routines write 4 or 8 bytes, starting at p. le is a bool - * argument, true if you want the string in little-endian format (exponent - * last, at p+3 or p+7), false if you want big-endian format (exponent - * first, at p). - * Return value: 0 if all is OK, -1 if error (and an exception is - * set, most likely OverflowError). - * There are two problems on non-IEEE platforms: - * 1): What this does is undefined if x is a NaN or infinity. - * 2): -0.0 and +0.0 produce the same string. - */ -PyAPI_FUNC(int) _PyFloat_Pack4(double x, unsigned char *p, int le); -PyAPI_FUNC(int) _PyFloat_Pack8(double x, unsigned char *p, int le); - -/* Used to get the important decimal digits of a double */ -PyAPI_FUNC(int) _PyFloat_Digits(char *buf, double v, int *signum); -PyAPI_FUNC(void) _PyFloat_DigitsInit(void); - -/* The unpack routines read 4 or 8 bytes, starting at p. le is a bool - * argument, true if the string is in little-endian format (exponent - * last, at p+3 or p+7), false if big-endian (exponent first, at p). - * Return value: The unpacked double. On error, this is -1.0 and - * PyErr_Occurred() is true (and an exception is set, most likely - * OverflowError). Note that on a non-IEEE platform this will refuse - * to unpack a string that represents a NaN or infinity. - */ -PyAPI_FUNC(double) _PyFloat_Unpack4(const unsigned char *p, int le); -PyAPI_FUNC(double) _PyFloat_Unpack8(const unsigned char *p, int le); - -/* free list api */ -PyAPI_FUNC(int) PyFloat_ClearFreeList(void); - -/* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyFloat_FormatAdvanced(PyObject *obj, - char *format_spec, - Py_ssize_t format_spec_len); - -/* Round a C double x to the closest multiple of 10**-ndigits. Returns a - Python float on success, or NULL (with an appropriate exception set) on - failure. Used in builtin_round in bltinmodule.c. */ -PyAPI_FUNC(PyObject *) _Py_double_round(double x, int ndigits); - - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_FLOATOBJECT_H */
--- a/test/include/python2.7/frameobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,89 +0,0 @@ - -/* Frame object interface */ - -#ifndef Py_FRAMEOBJECT_H -#define Py_FRAMEOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - int b_type; /* what kind of block this is */ - int b_handler; /* where to jump to find handler */ - int b_level; /* value stack level to pop to */ -} PyTryBlock; - -typedef struct _frame { - PyObject_VAR_HEAD - struct _frame *f_back; /* previous frame, or NULL */ - PyCodeObject *f_code; /* code segment */ - PyObject *f_builtins; /* builtin symbol table (PyDictObject) */ - PyObject *f_globals; /* global symbol table (PyDictObject) */ - PyObject *f_locals; /* local symbol table (any mapping) */ - PyObject **f_valuestack; /* points after the last local */ - /* Next free slot in f_valuestack. Frame creation sets to f_valuestack. - Frame evaluation usually NULLs it, but a frame that yields sets it - to the current stack top. */ - PyObject **f_stacktop; - PyObject *f_trace; /* Trace function */ - - /* If an exception is raised in this frame, the next three are used to - * record the exception info (if any) originally in the thread state. See - * comments before set_exc_info() -- it's not obvious. - * Invariant: if _type is NULL, then so are _value and _traceback. - * Desired invariant: all three are NULL, or all three are non-NULL. That - * one isn't currently true, but "should be". - */ - PyObject *f_exc_type, *f_exc_value, *f_exc_traceback; - - PyThreadState *f_tstate; - int f_lasti; /* Last instruction if called */ - /* Call PyFrame_GetLineNumber() instead of reading this field - directly. As of 2.3 f_lineno is only valid when tracing is - active (i.e. when f_trace is set). At other times we use - PyCode_Addr2Line to calculate the line from the current - bytecode index. */ - int f_lineno; /* Current line number */ - int f_iblock; /* index in f_blockstack */ - PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */ - PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */ -} PyFrameObject; - - -/* Standard object interface */ - -PyAPI_DATA(PyTypeObject) PyFrame_Type; - -#define PyFrame_Check(op) ((op)->ob_type == &PyFrame_Type) -#define PyFrame_IsRestricted(f) \ - ((f)->f_builtins != (f)->f_tstate->interp->builtins) - -PyAPI_FUNC(PyFrameObject *) PyFrame_New(PyThreadState *, PyCodeObject *, - PyObject *, PyObject *); - - -/* The rest of the interface is specific for frame objects */ - -/* Block management functions */ - -PyAPI_FUNC(void) PyFrame_BlockSetup(PyFrameObject *, int, int, int); -PyAPI_FUNC(PyTryBlock *) PyFrame_BlockPop(PyFrameObject *); - -/* Extend the value stack */ - -PyAPI_FUNC(PyObject **) PyFrame_ExtendStack(PyFrameObject *, int, int); - -/* Conversions between "fast locals" and locals in dictionary */ - -PyAPI_FUNC(void) PyFrame_LocalsToFast(PyFrameObject *, int); -PyAPI_FUNC(void) PyFrame_FastToLocals(PyFrameObject *); - -PyAPI_FUNC(int) PyFrame_ClearFreeList(void); - -/* Return the line of code the frame is currently executing. */ -PyAPI_FUNC(int) PyFrame_GetLineNumber(PyFrameObject *); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_FRAMEOBJECT_H */
--- a/test/include/python2.7/funcobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ - -/* Function object interface */ - -#ifndef Py_FUNCOBJECT_H -#define Py_FUNCOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -/* Function objects and code objects should not be confused with each other: - * - * Function objects are created by the execution of the 'def' statement. - * They reference a code object in their func_code attribute, which is a - * purely syntactic object, i.e. nothing more than a compiled version of some - * source code lines. There is one code object per source code "fragment", - * but each code object can be referenced by zero or many function objects - * depending only on how many times the 'def' statement in the source was - * executed so far. - */ - -typedef struct { - PyObject_HEAD - PyObject *func_code; /* A code object */ - PyObject *func_globals; /* A dictionary (other mappings won't do) */ - PyObject *func_defaults; /* NULL or a tuple */ - PyObject *func_closure; /* NULL or a tuple of cell objects */ - PyObject *func_doc; /* The __doc__ attribute, can be anything */ - PyObject *func_name; /* The __name__ attribute, a string object */ - PyObject *func_dict; /* The __dict__ attribute, a dict or NULL */ - PyObject *func_weakreflist; /* List of weak references */ - PyObject *func_module; /* The __module__ attribute, can be anything */ - - /* Invariant: - * func_closure contains the bindings for func_code->co_freevars, so - * PyTuple_Size(func_closure) == PyCode_GetNumFree(func_code) - * (func_closure may be NULL if PyCode_GetNumFree(func_code) == 0). - */ -} PyFunctionObject; - -PyAPI_DATA(PyTypeObject) PyFunction_Type; - -#define PyFunction_Check(op) (Py_TYPE(op) == &PyFunction_Type) - -PyAPI_FUNC(PyObject *) PyFunction_New(PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyFunction_GetCode(PyObject *); -PyAPI_FUNC(PyObject *) PyFunction_GetGlobals(PyObject *); -PyAPI_FUNC(PyObject *) PyFunction_GetModule(PyObject *); -PyAPI_FUNC(PyObject *) PyFunction_GetDefaults(PyObject *); -PyAPI_FUNC(int) PyFunction_SetDefaults(PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyFunction_GetClosure(PyObject *); -PyAPI_FUNC(int) PyFunction_SetClosure(PyObject *, PyObject *); - -/* Macros for direct access to these values. Type checks are *not* - done, so use with care. */ -#define PyFunction_GET_CODE(func) \ - (((PyFunctionObject *)func) -> func_code) -#define PyFunction_GET_GLOBALS(func) \ - (((PyFunctionObject *)func) -> func_globals) -#define PyFunction_GET_MODULE(func) \ - (((PyFunctionObject *)func) -> func_module) -#define PyFunction_GET_DEFAULTS(func) \ - (((PyFunctionObject *)func) -> func_defaults) -#define PyFunction_GET_CLOSURE(func) \ - (((PyFunctionObject *)func) -> func_closure) - -/* The classmethod and staticmethod types lives here, too */ -PyAPI_DATA(PyTypeObject) PyClassMethod_Type; -PyAPI_DATA(PyTypeObject) PyStaticMethod_Type; - -PyAPI_FUNC(PyObject *) PyClassMethod_New(PyObject *); -PyAPI_FUNC(PyObject *) PyStaticMethod_New(PyObject *); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_FUNCOBJECT_H */
--- a/test/include/python2.7/genobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ - -/* Generator object interface */ - -#ifndef Py_GENOBJECT_H -#define Py_GENOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -struct _frame; /* Avoid including frameobject.h */ - -typedef struct { - PyObject_HEAD - /* The gi_ prefix is intended to remind of generator-iterator. */ - - /* Note: gi_frame can be NULL if the generator is "finished" */ - struct _frame *gi_frame; - - /* True if generator is being executed. */ - int gi_running; - - /* The code object backing the generator */ - PyObject *gi_code; - - /* List of weak reference. */ - PyObject *gi_weakreflist; -} PyGenObject; - -PyAPI_DATA(PyTypeObject) PyGen_Type; - -#define PyGen_Check(op) PyObject_TypeCheck(op, &PyGen_Type) -#define PyGen_CheckExact(op) (Py_TYPE(op) == &PyGen_Type) - -PyAPI_FUNC(PyObject *) PyGen_New(struct _frame *); -PyAPI_FUNC(int) PyGen_NeedsFinalizing(PyGenObject *); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_GENOBJECT_H */
--- a/test/include/python2.7/graminit.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,87 +0,0 @@ -/* Generated by Parser/pgen */ - -#define single_input 256 -#define file_input 257 -#define eval_input 258 -#define decorator 259 -#define decorators 260 -#define decorated 261 -#define funcdef 262 -#define parameters 263 -#define varargslist 264 -#define fpdef 265 -#define fplist 266 -#define stmt 267 -#define simple_stmt 268 -#define small_stmt 269 -#define expr_stmt 270 -#define augassign 271 -#define print_stmt 272 -#define del_stmt 273 -#define pass_stmt 274 -#define flow_stmt 275 -#define break_stmt 276 -#define continue_stmt 277 -#define return_stmt 278 -#define yield_stmt 279 -#define raise_stmt 280 -#define import_stmt 281 -#define import_name 282 -#define import_from 283 -#define import_as_name 284 -#define dotted_as_name 285 -#define import_as_names 286 -#define dotted_as_names 287 -#define dotted_name 288 -#define global_stmt 289 -#define exec_stmt 290 -#define assert_stmt 291 -#define compound_stmt 292 -#define if_stmt 293 -#define while_stmt 294 -#define for_stmt 295 -#define try_stmt 296 -#define with_stmt 297 -#define with_item 298 -#define except_clause 299 -#define suite 300 -#define testlist_safe 301 -#define old_test 302 -#define old_lambdef 303 -#define test 304 -#define or_test 305 -#define and_test 306 -#define not_test 307 -#define comparison 308 -#define comp_op 309 -#define expr 310 -#define xor_expr 311 -#define and_expr 312 -#define shift_expr 313 -#define arith_expr 314 -#define term 315 -#define factor 316 -#define power 317 -#define atom 318 -#define listmaker 319 -#define testlist_comp 320 -#define lambdef 321 -#define trailer 322 -#define subscriptlist 323 -#define subscript 324 -#define sliceop 325 -#define exprlist 326 -#define testlist 327 -#define dictorsetmaker 328 -#define classdef 329 -#define arglist 330 -#define argument 331 -#define list_iter 332 -#define list_for 333 -#define list_if 334 -#define comp_iter 335 -#define comp_for 336 -#define comp_if 337 -#define testlist1 338 -#define encoding_decl 339 -#define yield_expr 340
--- a/test/include/python2.7/grammar.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,93 +0,0 @@ - -/* Grammar interface */ - -#ifndef Py_GRAMMAR_H -#define Py_GRAMMAR_H -#ifdef __cplusplus -extern "C" { -#endif - -#include "bitset.h" /* Sigh... */ - -/* A label of an arc */ - -typedef struct { - int lb_type; - char *lb_str; -} label; - -#define EMPTY 0 /* Label number 0 is by definition the empty label */ - -/* A list of labels */ - -typedef struct { - int ll_nlabels; - label *ll_label; -} labellist; - -/* An arc from one state to another */ - -typedef struct { - short a_lbl; /* Label of this arc */ - short a_arrow; /* State where this arc goes to */ -} arc; - -/* A state in a DFA */ - -typedef struct { - int s_narcs; - arc *s_arc; /* Array of arcs */ - - /* Optional accelerators */ - int s_lower; /* Lowest label index */ - int s_upper; /* Highest label index */ - int *s_accel; /* Accelerator */ - int s_accept; /* Nonzero for accepting state */ -} state; - -/* A DFA */ - -typedef struct { - int d_type; /* Non-terminal this represents */ - char *d_name; /* For printing */ - int d_initial; /* Initial state */ - int d_nstates; - state *d_state; /* Array of states */ - bitset d_first; -} dfa; - -/* A grammar */ - -typedef struct { - int g_ndfas; - dfa *g_dfa; /* Array of DFAs */ - labellist g_ll; - int g_start; /* Start symbol of the grammar */ - int g_accel; /* Set if accelerators present */ -} grammar; - -/* FUNCTIONS */ - -grammar *newgrammar(int start); -dfa *adddfa(grammar *g, int type, char *name); -int addstate(dfa *d); -void addarc(dfa *d, int from, int to, int lbl); -dfa *PyGrammar_FindDFA(grammar *g, int type); - -int addlabel(labellist *ll, int type, char *str); -int findlabel(labellist *ll, int type, char *str); -char *PyGrammar_LabelRepr(label *lb); -void translatelabels(grammar *g); - -void addfirstsets(grammar *g); - -void PyGrammar_AddAccelerators(grammar *g); -void PyGrammar_RemoveAccelerators(grammar *); - -void printgrammar(grammar *g, FILE *fp); -void printnonterminals(grammar *g, FILE *fp); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_GRAMMAR_H */
--- a/test/include/python2.7/import.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,71 +0,0 @@ - -/* Module definition and import interface */ - -#ifndef Py_IMPORT_H -#define Py_IMPORT_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_FUNC(long) PyImport_GetMagicNumber(void); -PyAPI_FUNC(PyObject *) PyImport_ExecCodeModule(char *name, PyObject *co); -PyAPI_FUNC(PyObject *) PyImport_ExecCodeModuleEx( - char *name, PyObject *co, char *pathname); -PyAPI_FUNC(PyObject *) PyImport_GetModuleDict(void); -PyAPI_FUNC(PyObject *) PyImport_AddModule(const char *name); -PyAPI_FUNC(PyObject *) PyImport_ImportModule(const char *name); -PyAPI_FUNC(PyObject *) PyImport_ImportModuleNoBlock(const char *); -PyAPI_FUNC(PyObject *) PyImport_ImportModuleLevel(char *name, - PyObject *globals, PyObject *locals, PyObject *fromlist, int level); - -#define PyImport_ImportModuleEx(n, g, l, f) \ - PyImport_ImportModuleLevel(n, g, l, f, -1) - -PyAPI_FUNC(PyObject *) PyImport_GetImporter(PyObject *path); -PyAPI_FUNC(PyObject *) PyImport_Import(PyObject *name); -PyAPI_FUNC(PyObject *) PyImport_ReloadModule(PyObject *m); -PyAPI_FUNC(void) PyImport_Cleanup(void); -PyAPI_FUNC(int) PyImport_ImportFrozenModule(char *); - -#ifdef WITH_THREAD -PyAPI_FUNC(void) _PyImport_AcquireLock(void); -PyAPI_FUNC(int) _PyImport_ReleaseLock(void); -#else -#define _PyImport_AcquireLock() -#define _PyImport_ReleaseLock() 1 -#endif - -PyAPI_FUNC(struct filedescr *) _PyImport_FindModule( - const char *, PyObject *, char *, size_t, FILE **, PyObject **); -PyAPI_FUNC(int) _PyImport_IsScript(struct filedescr *); -PyAPI_FUNC(void) _PyImport_ReInitLock(void); - -PyAPI_FUNC(PyObject *) _PyImport_FindExtension(char *, char *); -PyAPI_FUNC(PyObject *) _PyImport_FixupExtension(char *, char *); - -struct _inittab { - char *name; - void (*initfunc)(void); -}; - -PyAPI_DATA(PyTypeObject) PyNullImporter_Type; -PyAPI_DATA(struct _inittab *) PyImport_Inittab; - -PyAPI_FUNC(int) PyImport_AppendInittab(const char *name, void (*initfunc)(void)); -PyAPI_FUNC(int) PyImport_ExtendInittab(struct _inittab *newtab); - -struct _frozen { - char *name; - unsigned char *code; - int size; -}; - -/* Embedding apps may change this pointer to point to their favorite - collection of frozen modules: */ - -PyAPI_DATA(struct _frozen *) PyImport_FrozenModules; - -#ifdef __cplusplus -} -#endif -#endif /* !Py_IMPORT_H */
--- a/test/include/python2.7/intobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,81 +0,0 @@ - -/* Integer object interface */ - -/* -PyIntObject represents a (long) integer. This is an immutable object; -an integer cannot change its value after creation. - -There are functions to create new integer objects, to test an object -for integer-ness, and to get the integer value. The latter functions -returns -1 and sets errno to EBADF if the object is not an PyIntObject. -None of the functions should be applied to nil objects. - -The type PyIntObject is (unfortunately) exposed here so we can declare -_Py_TrueStruct and _Py_ZeroStruct in boolobject.h; don't use this. -*/ - -#ifndef Py_INTOBJECT_H -#define Py_INTOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - PyObject_HEAD - long ob_ival; -} PyIntObject; - -PyAPI_DATA(PyTypeObject) PyInt_Type; - -#define PyInt_Check(op) \ - PyType_FastSubclass((op)->ob_type, Py_TPFLAGS_INT_SUBCLASS) -#define PyInt_CheckExact(op) ((op)->ob_type == &PyInt_Type) - -PyAPI_FUNC(PyObject *) PyInt_FromString(char*, char**, int); -#ifdef Py_USING_UNICODE -PyAPI_FUNC(PyObject *) PyInt_FromUnicode(Py_UNICODE*, Py_ssize_t, int); -#endif -PyAPI_FUNC(PyObject *) PyInt_FromLong(long); -PyAPI_FUNC(PyObject *) PyInt_FromSize_t(size_t); -PyAPI_FUNC(PyObject *) PyInt_FromSsize_t(Py_ssize_t); -PyAPI_FUNC(long) PyInt_AsLong(PyObject *); -PyAPI_FUNC(Py_ssize_t) PyInt_AsSsize_t(PyObject *); -PyAPI_FUNC(int) _PyInt_AsInt(PyObject *); -PyAPI_FUNC(unsigned long) PyInt_AsUnsignedLongMask(PyObject *); -#ifdef HAVE_LONG_LONG -PyAPI_FUNC(unsigned PY_LONG_LONG) PyInt_AsUnsignedLongLongMask(PyObject *); -#endif - -PyAPI_FUNC(long) PyInt_GetMax(void); - -/* Macro, trading safety for speed */ -#define PyInt_AS_LONG(op) (((PyIntObject *)(op))->ob_ival) - -/* These aren't really part of the Int object, but they're handy; the protos - * are necessary for systems that need the magic of PyAPI_FUNC and that want - * to have stropmodule as a dynamically loaded module instead of building it - * into the main Python shared library/DLL. Guido thinks I'm weird for - * building it this way. :-) [cjh] - */ -PyAPI_FUNC(unsigned long) PyOS_strtoul(char *, char **, int); -PyAPI_FUNC(long) PyOS_strtol(char *, char **, int); - -/* free list api */ -PyAPI_FUNC(int) PyInt_ClearFreeList(void); - -/* Convert an integer to the given base. Returns a string. - If base is 2, 8 or 16, add the proper prefix '0b', '0o' or '0x'. - If newstyle is zero, then use the pre-2.6 behavior of octal having - a leading "0" */ -PyAPI_FUNC(PyObject*) _PyInt_Format(PyIntObject* v, int base, int newstyle); - -/* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyInt_FormatAdvanced(PyObject *obj, - char *format_spec, - Py_ssize_t format_spec_len); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_INTOBJECT_H */
--- a/test/include/python2.7/intrcheck.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ - -#ifndef Py_INTRCHECK_H -#define Py_INTRCHECK_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_FUNC(int) PyOS_InterruptOccurred(void); -PyAPI_FUNC(void) PyOS_InitInterrupts(void); -PyAPI_FUNC(void) PyOS_AfterFork(void); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_INTRCHECK_H */
--- a/test/include/python2.7/iterobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -#ifndef Py_ITEROBJECT_H -#define Py_ITEROBJECT_H -/* Iterators (the basic kind, over a sequence) */ -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_DATA(PyTypeObject) PySeqIter_Type; - -#define PySeqIter_Check(op) (Py_TYPE(op) == &PySeqIter_Type) - -PyAPI_FUNC(PyObject *) PySeqIter_New(PyObject *); - -PyAPI_DATA(PyTypeObject) PyCallIter_Type; - -#define PyCallIter_Check(op) (Py_TYPE(op) == &PyCallIter_Type) - -PyAPI_FUNC(PyObject *) PyCallIter_New(PyObject *, PyObject *); -#ifdef __cplusplus -} -#endif -#endif /* !Py_ITEROBJECT_H */ -
--- a/test/include/python2.7/listobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,68 +0,0 @@ - -/* List object interface */ - -/* -Another generally useful object type is a list of object pointers. -This is a mutable type: the list items can be changed, and items can be -added or removed. Out-of-range indices or non-list objects are ignored. - -*** WARNING *** PyList_SetItem does not increment the new item's reference -count, but does decrement the reference count of the item it replaces, -if not nil. It does *decrement* the reference count if it is *not* -inserted in the list. Similarly, PyList_GetItem does not increment the -returned item's reference count. -*/ - -#ifndef Py_LISTOBJECT_H -#define Py_LISTOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - PyObject_VAR_HEAD - /* Vector of pointers to list elements. list[0] is ob_item[0], etc. */ - PyObject **ob_item; - - /* ob_item contains space for 'allocated' elements. The number - * currently in use is ob_size. - * Invariants: - * 0 <= ob_size <= allocated - * len(list) == ob_size - * ob_item == NULL implies ob_size == allocated == 0 - * list.sort() temporarily sets allocated to -1 to detect mutations. - * - * Items must normally not be NULL, except during construction when - * the list is not yet visible outside the function that builds it. - */ - Py_ssize_t allocated; -} PyListObject; - -PyAPI_DATA(PyTypeObject) PyList_Type; - -#define PyList_Check(op) \ - PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LIST_SUBCLASS) -#define PyList_CheckExact(op) (Py_TYPE(op) == &PyList_Type) - -PyAPI_FUNC(PyObject *) PyList_New(Py_ssize_t size); -PyAPI_FUNC(Py_ssize_t) PyList_Size(PyObject *); -PyAPI_FUNC(PyObject *) PyList_GetItem(PyObject *, Py_ssize_t); -PyAPI_FUNC(int) PyList_SetItem(PyObject *, Py_ssize_t, PyObject *); -PyAPI_FUNC(int) PyList_Insert(PyObject *, Py_ssize_t, PyObject *); -PyAPI_FUNC(int) PyList_Append(PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyList_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); -PyAPI_FUNC(int) PyList_SetSlice(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *); -PyAPI_FUNC(int) PyList_Sort(PyObject *); -PyAPI_FUNC(int) PyList_Reverse(PyObject *); -PyAPI_FUNC(PyObject *) PyList_AsTuple(PyObject *); -PyAPI_FUNC(PyObject *) _PyList_Extend(PyListObject *, PyObject *); - -/* Macro, trading safety for speed */ -#define PyList_GET_ITEM(op, i) (((PyListObject *)(op))->ob_item[i]) -#define PyList_SET_ITEM(op, i, v) (((PyListObject *)(op))->ob_item[i] = (v)) -#define PyList_GET_SIZE(op) Py_SIZE(op) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_LISTOBJECT_H */
--- a/test/include/python2.7/longintrepr.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -#ifndef Py_LONGINTREPR_H -#define Py_LONGINTREPR_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* This is published for the benefit of "friend" marshal.c only. */ - -/* Parameters of the long integer representation. There are two different - sets of parameters: one set for 30-bit digits, stored in an unsigned 32-bit - integer type, and one set for 15-bit digits with each digit stored in an - unsigned short. The value of PYLONG_BITS_IN_DIGIT, defined either at - configure time or in pyport.h, is used to decide which digit size to use. - - Type 'digit' should be able to hold 2*PyLong_BASE-1, and type 'twodigits' - should be an unsigned integer type able to hold all integers up to - PyLong_BASE*PyLong_BASE-1. x_sub assumes that 'digit' is an unsigned type, - and that overflow is handled by taking the result modulo 2**N for some N > - PyLong_SHIFT. The majority of the code doesn't care about the precise - value of PyLong_SHIFT, but there are some notable exceptions: - - - long_pow() requires that PyLong_SHIFT be divisible by 5 - - - PyLong_{As,From}ByteArray require that PyLong_SHIFT be at least 8 - - - long_hash() requires that PyLong_SHIFT is *strictly* less than the number - of bits in an unsigned long, as do the PyLong <-> long (or unsigned long) - conversion functions - - - the long <-> size_t/Py_ssize_t conversion functions expect that - PyLong_SHIFT is strictly less than the number of bits in a size_t - - - the marshal code currently expects that PyLong_SHIFT is a multiple of 15 - - The values 15 and 30 should fit all of the above requirements, on any - platform. -*/ - -#if PYLONG_BITS_IN_DIGIT == 30 -#if !(defined HAVE_UINT64_T && defined HAVE_UINT32_T && \ - defined HAVE_INT64_T && defined HAVE_INT32_T) -#error "30-bit long digits requested, but the necessary types are not available on this platform" -#endif -typedef PY_UINT32_T digit; -typedef PY_INT32_T sdigit; /* signed variant of digit */ -typedef PY_UINT64_T twodigits; -typedef PY_INT64_T stwodigits; /* signed variant of twodigits */ -#define PyLong_SHIFT 30 -#define _PyLong_DECIMAL_SHIFT 9 /* max(e such that 10**e fits in a digit) */ -#define _PyLong_DECIMAL_BASE ((digit)1000000000) /* 10 ** DECIMAL_SHIFT */ -#elif PYLONG_BITS_IN_DIGIT == 15 -typedef unsigned short digit; -typedef short sdigit; /* signed variant of digit */ -typedef unsigned long twodigits; -typedef long stwodigits; /* signed variant of twodigits */ -#define PyLong_SHIFT 15 -#define _PyLong_DECIMAL_SHIFT 4 /* max(e such that 10**e fits in a digit) */ -#define _PyLong_DECIMAL_BASE ((digit)10000) /* 10 ** DECIMAL_SHIFT */ -#else -#error "PYLONG_BITS_IN_DIGIT should be 15 or 30" -#endif -#define PyLong_BASE ((digit)1 << PyLong_SHIFT) -#define PyLong_MASK ((digit)(PyLong_BASE - 1)) - -/* b/w compatibility with Python 2.5 */ -#define SHIFT PyLong_SHIFT -#define BASE PyLong_BASE -#define MASK PyLong_MASK - -#if PyLong_SHIFT % 5 != 0 -#error "longobject.c requires that PyLong_SHIFT be divisible by 5" -#endif - -/* Long integer representation. - The absolute value of a number is equal to - SUM(for i=0 through abs(ob_size)-1) ob_digit[i] * 2**(SHIFT*i) - Negative numbers are represented with ob_size < 0; - zero is represented by ob_size == 0. - In a normalized number, ob_digit[abs(ob_size)-1] (the most significant - digit) is never zero. Also, in all cases, for all valid i, - 0 <= ob_digit[i] <= MASK. - The allocation function takes care of allocating extra memory - so that ob_digit[0] ... ob_digit[abs(ob_size)-1] are actually available. - - CAUTION: Generic code manipulating subtypes of PyVarObject has to - aware that longs abuse ob_size's sign bit. -*/ - -struct _longobject { - PyObject_VAR_HEAD - digit ob_digit[1]; -}; - -PyAPI_FUNC(PyLongObject *) _PyLong_New(Py_ssize_t); - -/* Return a copy of src. */ -PyAPI_FUNC(PyObject *) _PyLong_Copy(PyLongObject *src); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_LONGINTREPR_H */
--- a/test/include/python2.7/longobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,135 +0,0 @@ -#ifndef Py_LONGOBJECT_H -#define Py_LONGOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Long (arbitrary precision) integer object interface */ - -typedef struct _longobject PyLongObject; /* Revealed in longintrepr.h */ - -PyAPI_DATA(PyTypeObject) PyLong_Type; - -#define PyLong_Check(op) \ - PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LONG_SUBCLASS) -#define PyLong_CheckExact(op) (Py_TYPE(op) == &PyLong_Type) - -PyAPI_FUNC(PyObject *) PyLong_FromLong(long); -PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLong(unsigned long); -PyAPI_FUNC(PyObject *) PyLong_FromDouble(double); -PyAPI_FUNC(PyObject *) PyLong_FromSize_t(size_t); -PyAPI_FUNC(PyObject *) PyLong_FromSsize_t(Py_ssize_t); -PyAPI_FUNC(long) PyLong_AsLong(PyObject *); -PyAPI_FUNC(long) PyLong_AsLongAndOverflow(PyObject *, int *); -PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLong(PyObject *); -PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLongMask(PyObject *); -PyAPI_FUNC(Py_ssize_t) PyLong_AsSsize_t(PyObject *); -PyAPI_FUNC(int) _PyLong_AsInt(PyObject *); -PyAPI_FUNC(PyObject *) PyLong_GetInfo(void); - -/* For use by intobject.c only */ -#define _PyLong_AsSsize_t PyLong_AsSsize_t -#define _PyLong_FromSize_t PyLong_FromSize_t -#define _PyLong_FromSsize_t PyLong_FromSsize_t -PyAPI_DATA(int) _PyLong_DigitValue[256]; - -/* _PyLong_Frexp returns a double x and an exponent e such that the - true value is approximately equal to x * 2**e. e is >= 0. x is - 0.0 if and only if the input is 0 (in which case, e and x are both - zeroes); otherwise, 0.5 <= abs(x) < 1.0. On overflow, which is - possible if the number of bits doesn't fit into a Py_ssize_t, sets - OverflowError and returns -1.0 for x, 0 for e. */ -PyAPI_FUNC(double) _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e); - -PyAPI_FUNC(double) PyLong_AsDouble(PyObject *); -PyAPI_FUNC(PyObject *) PyLong_FromVoidPtr(void *); -PyAPI_FUNC(void *) PyLong_AsVoidPtr(PyObject *); - -#ifdef HAVE_LONG_LONG -PyAPI_FUNC(PyObject *) PyLong_FromLongLong(PY_LONG_LONG); -PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLongLong(unsigned PY_LONG_LONG); -PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLong(PyObject *); -PyAPI_FUNC(unsigned PY_LONG_LONG) PyLong_AsUnsignedLongLong(PyObject *); -PyAPI_FUNC(unsigned PY_LONG_LONG) PyLong_AsUnsignedLongLongMask(PyObject *); -PyAPI_FUNC(PY_LONG_LONG) PyLong_AsLongLongAndOverflow(PyObject *, int *); -#endif /* HAVE_LONG_LONG */ - -PyAPI_FUNC(PyObject *) PyLong_FromString(char *, char **, int); -#ifdef Py_USING_UNICODE -PyAPI_FUNC(PyObject *) PyLong_FromUnicode(Py_UNICODE*, Py_ssize_t, int); -#endif - -/* _PyLong_Sign. Return 0 if v is 0, -1 if v < 0, +1 if v > 0. - v must not be NULL, and must be a normalized long. - There are no error cases. -*/ -PyAPI_FUNC(int) _PyLong_Sign(PyObject *v); - - -/* _PyLong_NumBits. Return the number of bits needed to represent the - absolute value of a long. For example, this returns 1 for 1 and -1, 2 - for 2 and -2, and 2 for 3 and -3. It returns 0 for 0. - v must not be NULL, and must be a normalized long. - (size_t)-1 is returned and OverflowError set if the true result doesn't - fit in a size_t. -*/ -PyAPI_FUNC(size_t) _PyLong_NumBits(PyObject *v); - -/* _PyLong_FromByteArray: View the n unsigned bytes as a binary integer in - base 256, and return a Python long with the same numeric value. - If n is 0, the integer is 0. Else: - If little_endian is 1/true, bytes[n-1] is the MSB and bytes[0] the LSB; - else (little_endian is 0/false) bytes[0] is the MSB and bytes[n-1] the - LSB. - If is_signed is 0/false, view the bytes as a non-negative integer. - If is_signed is 1/true, view the bytes as a 2's-complement integer, - non-negative if bit 0x80 of the MSB is clear, negative if set. - Error returns: - + Return NULL with the appropriate exception set if there's not - enough memory to create the Python long. -*/ -PyAPI_FUNC(PyObject *) _PyLong_FromByteArray( - const unsigned char* bytes, size_t n, - int little_endian, int is_signed); - -/* _PyLong_AsByteArray: Convert the least-significant 8*n bits of long - v to a base-256 integer, stored in array bytes. Normally return 0, - return -1 on error. - If little_endian is 1/true, store the MSB at bytes[n-1] and the LSB at - bytes[0]; else (little_endian is 0/false) store the MSB at bytes[0] and - the LSB at bytes[n-1]. - If is_signed is 0/false, it's an error if v < 0; else (v >= 0) n bytes - are filled and there's nothing special about bit 0x80 of the MSB. - If is_signed is 1/true, bytes is filled with the 2's-complement - representation of v's value. Bit 0x80 of the MSB is the sign bit. - Error returns (-1): - + is_signed is 0 and v < 0. TypeError is set in this case, and bytes - isn't altered. - + n isn't big enough to hold the full mathematical value of v. For - example, if is_signed is 0 and there are more digits in the v than - fit in n; or if is_signed is 1, v < 0, and n is just 1 bit shy of - being large enough to hold a sign bit. OverflowError is set in this - case, but bytes holds the least-signficant n bytes of the true value. -*/ -PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, - unsigned char* bytes, size_t n, - int little_endian, int is_signed); - -/* _PyLong_Format: Convert the long to a string object with given base, - appending a base prefix of 0[box] if base is 2, 8 or 16. - Add a trailing "L" if addL is non-zero. - If newstyle is zero, then use the pre-2.6 behavior of octal having - a leading "0", instead of the prefix "0o" */ -PyAPI_FUNC(PyObject *) _PyLong_Format(PyObject *aa, int base, int addL, int newstyle); - -/* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyLong_FormatAdvanced(PyObject *obj, - char *format_spec, - Py_ssize_t format_spec_len); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_LONGOBJECT_H */
--- a/test/include/python2.7/marshal.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,25 +0,0 @@ - -/* Interface for marshal.c */ - -#ifndef Py_MARSHAL_H -#define Py_MARSHAL_H -#ifdef __cplusplus -extern "C" { -#endif - -#define Py_MARSHAL_VERSION 2 - -PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int); -PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int); -PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *, int); - -PyAPI_FUNC(long) PyMarshal_ReadLongFromFile(FILE *); -PyAPI_FUNC(int) PyMarshal_ReadShortFromFile(FILE *); -PyAPI_FUNC(PyObject *) PyMarshal_ReadObjectFromFile(FILE *); -PyAPI_FUNC(PyObject *) PyMarshal_ReadLastObjectFromFile(FILE *); -PyAPI_FUNC(PyObject *) PyMarshal_ReadObjectFromString(char *, Py_ssize_t); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_MARSHAL_H */
--- a/test/include/python2.7/memoryobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,74 +0,0 @@ -/* Memory view object. In Python this is available as "memoryview". */ - -#ifndef Py_MEMORYOBJECT_H -#define Py_MEMORYOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_DATA(PyTypeObject) PyMemoryView_Type; - -#define PyMemoryView_Check(op) (Py_TYPE(op) == &PyMemoryView_Type) - -/* Get a pointer to the underlying Py_buffer of a memoryview object. */ -#define PyMemoryView_GET_BUFFER(op) (&((PyMemoryViewObject *)(op))->view) -/* Get a pointer to the PyObject from which originates a memoryview object. */ -#define PyMemoryView_GET_BASE(op) (((PyMemoryViewObject *)(op))->view.obj) - - -PyAPI_FUNC(PyObject *) PyMemoryView_GetContiguous(PyObject *base, - int buffertype, - char fort); - - /* Return a contiguous chunk of memory representing the buffer - from an object in a memory view object. If a copy is made then the - base object for the memory view will be a *new* bytes object. - - Otherwise, the base-object will be the object itself and no - data-copying will be done. - - The buffertype argument can be PyBUF_READ, PyBUF_WRITE, - PyBUF_SHADOW to determine whether the returned buffer - should be READONLY, WRITABLE, or set to update the - original buffer if a copy must be made. If buffertype is - PyBUF_WRITE and the buffer is not contiguous an error will - be raised. In this circumstance, the user can use - PyBUF_SHADOW to ensure that a writable temporary - contiguous buffer is returned. The contents of this - contiguous buffer will be copied back into the original - object after the memoryview object is deleted as long as - the original object is writable and allows setting an - exclusive write lock. If this is not allowed by the - original object, then a BufferError is raised. - - If the object is multi-dimensional and if fortran is 'F', - the first dimension of the underlying array will vary the - fastest in the buffer. If fortran is 'C', then the last - dimension will vary the fastest (C-style contiguous). If - fortran is 'A', then it does not matter and you will get - whatever the object decides is more efficient. - - A new reference is returned that must be DECREF'd when finished. - */ - -PyAPI_FUNC(PyObject *) PyMemoryView_FromObject(PyObject *base); - -PyAPI_FUNC(PyObject *) PyMemoryView_FromBuffer(Py_buffer *info); - /* create new if bufptr is NULL - will be a new bytesobject in base */ - - -/* The struct is declared here so that macros can work, but it shouldn't - be considered public. Don't access those fields directly, use the macros - and functions instead! */ -typedef struct { - PyObject_HEAD - PyObject *base; - Py_buffer view; -} PyMemoryViewObject; - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_MEMORYOBJECT_H */
--- a/test/include/python2.7/metagrammar.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -#ifndef Py_METAGRAMMAR_H -#define Py_METAGRAMMAR_H -#ifdef __cplusplus -extern "C" { -#endif - - -#define MSTART 256 -#define RULE 257 -#define RHS 258 -#define ALT 259 -#define ITEM 260 -#define ATOM 261 - -#ifdef __cplusplus -} -#endif -#endif /* !Py_METAGRAMMAR_H */
--- a/test/include/python2.7/methodobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,93 +0,0 @@ - -/* Method object interface */ - -#ifndef Py_METHODOBJECT_H -#define Py_METHODOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -/* This is about the type 'builtin_function_or_method', - not Python methods in user-defined classes. See classobject.h - for the latter. */ - -PyAPI_DATA(PyTypeObject) PyCFunction_Type; - -#define PyCFunction_Check(op) (Py_TYPE(op) == &PyCFunction_Type) - -typedef PyObject *(*PyCFunction)(PyObject *, PyObject *); -typedef PyObject *(*PyCFunctionWithKeywords)(PyObject *, PyObject *, - PyObject *); -typedef PyObject *(*PyNoArgsFunction)(PyObject *); - -PyAPI_FUNC(PyCFunction) PyCFunction_GetFunction(PyObject *); -PyAPI_FUNC(PyObject *) PyCFunction_GetSelf(PyObject *); -PyAPI_FUNC(int) PyCFunction_GetFlags(PyObject *); - -/* Macros for direct access to these values. Type checks are *not* - done, so use with care. */ -#define PyCFunction_GET_FUNCTION(func) \ - (((PyCFunctionObject *)func) -> m_ml -> ml_meth) -#define PyCFunction_GET_SELF(func) \ - (((PyCFunctionObject *)func) -> m_self) -#define PyCFunction_GET_FLAGS(func) \ - (((PyCFunctionObject *)func) -> m_ml -> ml_flags) -PyAPI_FUNC(PyObject *) PyCFunction_Call(PyObject *, PyObject *, PyObject *); - -struct PyMethodDef { - const char *ml_name; /* The name of the built-in function/method */ - PyCFunction ml_meth; /* The C function that implements it */ - int ml_flags; /* Combination of METH_xxx flags, which mostly - describe the args expected by the C func */ - const char *ml_doc; /* The __doc__ attribute, or NULL */ -}; -typedef struct PyMethodDef PyMethodDef; - -PyAPI_FUNC(PyObject *) Py_FindMethod(PyMethodDef[], PyObject *, const char *); - -#define PyCFunction_New(ML, SELF) PyCFunction_NewEx((ML), (SELF), NULL) -PyAPI_FUNC(PyObject *) PyCFunction_NewEx(PyMethodDef *, PyObject *, - PyObject *); - -/* Flag passed to newmethodobject */ -#define METH_OLDARGS 0x0000 -#define METH_VARARGS 0x0001 -#define METH_KEYWORDS 0x0002 -/* METH_NOARGS and METH_O must not be combined with the flags above. */ -#define METH_NOARGS 0x0004 -#define METH_O 0x0008 - -/* METH_CLASS and METH_STATIC are a little different; these control - the construction of methods for a class. These cannot be used for - functions in modules. */ -#define METH_CLASS 0x0010 -#define METH_STATIC 0x0020 - -/* METH_COEXIST allows a method to be entered eventhough a slot has - already filled the entry. When defined, the flag allows a separate - method, "__contains__" for example, to coexist with a defined - slot like sq_contains. */ - -#define METH_COEXIST 0x0040 - -typedef struct PyMethodChain { - PyMethodDef *methods; /* Methods of this type */ - struct PyMethodChain *link; /* NULL or base type */ -} PyMethodChain; - -PyAPI_FUNC(PyObject *) Py_FindMethodInChain(PyMethodChain *, PyObject *, - const char *); - -typedef struct { - PyObject_HEAD - PyMethodDef *m_ml; /* Description of the C function to call */ - PyObject *m_self; /* Passed as 'self' arg to the C func, can be NULL */ - PyObject *m_module; /* The __module__ attribute, can be anything */ -} PyCFunctionObject; - -PyAPI_FUNC(int) PyCFunction_ClearFreeList(void); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_METHODOBJECT_H */
--- a/test/include/python2.7/modsupport.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,134 +0,0 @@ - -#ifndef Py_MODSUPPORT_H -#define Py_MODSUPPORT_H -#ifdef __cplusplus -extern "C" { -#endif - -/* Module support interface */ - -#include <stdarg.h> - -/* If PY_SSIZE_T_CLEAN is defined, each functions treats #-specifier - to mean Py_ssize_t */ -#ifdef PY_SSIZE_T_CLEAN -#define PyArg_Parse _PyArg_Parse_SizeT -#define PyArg_ParseTuple _PyArg_ParseTuple_SizeT -#define PyArg_ParseTupleAndKeywords _PyArg_ParseTupleAndKeywords_SizeT -#define PyArg_VaParse _PyArg_VaParse_SizeT -#define PyArg_VaParseTupleAndKeywords _PyArg_VaParseTupleAndKeywords_SizeT -#define Py_BuildValue _Py_BuildValue_SizeT -#define Py_VaBuildValue _Py_VaBuildValue_SizeT -#else -PyAPI_FUNC(PyObject *) _Py_VaBuildValue_SizeT(const char *, va_list); -#endif - -PyAPI_FUNC(int) PyArg_Parse(PyObject *, const char *, ...); -PyAPI_FUNC(int) PyArg_ParseTuple(PyObject *, const char *, ...) Py_FORMAT_PARSETUPLE(PyArg_ParseTuple, 2, 3); -PyAPI_FUNC(int) PyArg_ParseTupleAndKeywords(PyObject *, PyObject *, - const char *, char **, ...); -PyAPI_FUNC(int) PyArg_UnpackTuple(PyObject *, const char *, Py_ssize_t, Py_ssize_t, ...); -PyAPI_FUNC(PyObject *) Py_BuildValue(const char *, ...); -PyAPI_FUNC(PyObject *) _Py_BuildValue_SizeT(const char *, ...); -PyAPI_FUNC(int) _PyArg_NoKeywords(const char *funcname, PyObject *kw); - -PyAPI_FUNC(int) PyArg_VaParse(PyObject *, const char *, va_list); -PyAPI_FUNC(int) PyArg_VaParseTupleAndKeywords(PyObject *, PyObject *, - const char *, char **, va_list); -PyAPI_FUNC(PyObject *) Py_VaBuildValue(const char *, va_list); - -PyAPI_FUNC(int) PyModule_AddObject(PyObject *, const char *, PyObject *); -PyAPI_FUNC(int) PyModule_AddIntConstant(PyObject *, const char *, long); -PyAPI_FUNC(int) PyModule_AddStringConstant(PyObject *, const char *, const char *); -#define PyModule_AddIntMacro(m, c) PyModule_AddIntConstant(m, #c, c) -#define PyModule_AddStringMacro(m, c) PyModule_AddStringConstant(m, #c, c) - -#define PYTHON_API_VERSION 1013 -#define PYTHON_API_STRING "1013" -/* The API version is maintained (independently from the Python version) - so we can detect mismatches between the interpreter and dynamically - loaded modules. These are diagnosed by an error message but - the module is still loaded (because the mismatch can only be tested - after loading the module). The error message is intended to - explain the core dump a few seconds later. - - The symbol PYTHON_API_STRING defines the same value as a string - literal. *** PLEASE MAKE SURE THE DEFINITIONS MATCH. *** - - Please add a line or two to the top of this log for each API - version change: - - 22-Feb-2006 MvL 1013 PEP 353 - long indices for sequence lengths - - 19-Aug-2002 GvR 1012 Changes to string object struct for - interning changes, saving 3 bytes. - - 17-Jul-2001 GvR 1011 Descr-branch, just to be on the safe side - - 25-Jan-2001 FLD 1010 Parameters added to PyCode_New() and - PyFrame_New(); Python 2.1a2 - - 14-Mar-2000 GvR 1009 Unicode API added - - 3-Jan-1999 GvR 1007 Decided to change back! (Don't reuse 1008!) - - 3-Dec-1998 GvR 1008 Python 1.5.2b1 - - 18-Jan-1997 GvR 1007 string interning and other speedups - - 11-Oct-1996 GvR renamed Py_Ellipses to Py_Ellipsis :-( - - 30-Jul-1996 GvR Slice and ellipses syntax added - - 23-Jul-1996 GvR For 1.4 -- better safe than sorry this time :-) - - 7-Nov-1995 GvR Keyword arguments (should've been done at 1.3 :-( ) - - 10-Jan-1995 GvR Renamed globals to new naming scheme - - 9-Jan-1995 GvR Initial version (incompatible with older API) -*/ - -#ifdef MS_WINDOWS -/* Special defines for Windows versions used to live here. Things - have changed, and the "Version" is now in a global string variable. - Reason for this is that this for easier branding of a "custom DLL" - without actually needing a recompile. */ -#endif /* MS_WINDOWS */ - -#if SIZEOF_SIZE_T != SIZEOF_INT -/* On a 64-bit system, rename the Py_InitModule4 so that 2.4 - modules cannot get loaded into a 2.5 interpreter */ -#define Py_InitModule4 Py_InitModule4_64 -#endif - -#ifdef Py_TRACE_REFS - /* When we are tracing reference counts, rename Py_InitModule4 so - modules compiled with incompatible settings will generate a - link-time error. */ - #if SIZEOF_SIZE_T != SIZEOF_INT - #undef Py_InitModule4 - #define Py_InitModule4 Py_InitModule4TraceRefs_64 - #else - #define Py_InitModule4 Py_InitModule4TraceRefs - #endif -#endif - -PyAPI_FUNC(PyObject *) Py_InitModule4(const char *name, PyMethodDef *methods, - const char *doc, PyObject *self, - int apiver); - -#define Py_InitModule(name, methods) \ - Py_InitModule4(name, methods, (char *)NULL, (PyObject *)NULL, \ - PYTHON_API_VERSION) - -#define Py_InitModule3(name, methods, doc) \ - Py_InitModule4(name, methods, doc, (PyObject *)NULL, \ - PYTHON_API_VERSION) - -PyAPI_DATA(char *) _Py_PackageContext; - -#ifdef __cplusplus -} -#endif -#endif /* !Py_MODSUPPORT_H */
--- a/test/include/python2.7/moduleobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,24 +0,0 @@ - -/* Module object interface */ - -#ifndef Py_MODULEOBJECT_H -#define Py_MODULEOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_DATA(PyTypeObject) PyModule_Type; - -#define PyModule_Check(op) PyObject_TypeCheck(op, &PyModule_Type) -#define PyModule_CheckExact(op) (Py_TYPE(op) == &PyModule_Type) - -PyAPI_FUNC(PyObject *) PyModule_New(const char *); -PyAPI_FUNC(PyObject *) PyModule_GetDict(PyObject *); -PyAPI_FUNC(char *) PyModule_GetName(PyObject *); -PyAPI_FUNC(char *) PyModule_GetFilename(PyObject *); -PyAPI_FUNC(void) _PyModule_Clear(PyObject *); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_MODULEOBJECT_H */
--- a/test/include/python2.7/node.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ - -/* Parse tree node interface */ - -#ifndef Py_NODE_H -#define Py_NODE_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct _node { - short n_type; - char *n_str; - int n_lineno; - int n_col_offset; - int n_nchildren; - struct _node *n_child; -} node; - -PyAPI_FUNC(node *) PyNode_New(int type); -PyAPI_FUNC(int) PyNode_AddChild(node *n, int type, - char *str, int lineno, int col_offset); -PyAPI_FUNC(void) PyNode_Free(node *n); -PyAPI_FUNC(Py_ssize_t) _PyNode_SizeOf(node *n); - -/* Node access functions */ -#define NCH(n) ((n)->n_nchildren) - -#define CHILD(n, i) (&(n)->n_child[i]) -#define RCHILD(n, i) (CHILD(n, NCH(n) + i)) -#define TYPE(n) ((n)->n_type) -#define STR(n) ((n)->n_str) - -/* Assert that the type of a node is what we expect */ -#define REQ(n, type) assert(TYPE(n) == (type)) - -PyAPI_FUNC(void) PyNode_ListTree(node *); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_NODE_H */
--- a/test/include/python2.7/object.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1046 +0,0 @@ -#ifndef Py_OBJECT_H -#define Py_OBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Object and type object interface */ - -/* -Objects are structures allocated on the heap. Special rules apply to -the use of objects to ensure they are properly garbage-collected. -Objects are never allocated statically or on the stack; they must be -accessed through special macros and functions only. (Type objects are -exceptions to the first rule; the standard types are represented by -statically initialized type objects, although work on type/class unification -for Python 2.2 made it possible to have heap-allocated type objects too). - -An object has a 'reference count' that is increased or decreased when a -pointer to the object is copied or deleted; when the reference count -reaches zero there are no references to the object left and it can be -removed from the heap. - -An object has a 'type' that determines what it represents and what kind -of data it contains. An object's type is fixed when it is created. -Types themselves are represented as objects; an object contains a -pointer to the corresponding type object. The type itself has a type -pointer pointing to the object representing the type 'type', which -contains a pointer to itself!). - -Objects do not float around in memory; once allocated an object keeps -the same size and address. Objects that must hold variable-size data -can contain pointers to variable-size parts of the object. Not all -objects of the same type have the same size; but the size cannot change -after allocation. (These restrictions are made so a reference to an -object can be simply a pointer -- moving an object would require -updating all the pointers, and changing an object's size would require -moving it if there was another object right next to it.) - -Objects are always accessed through pointers of the type 'PyObject *'. -The type 'PyObject' is a structure that only contains the reference count -and the type pointer. The actual memory allocated for an object -contains other data that can only be accessed after casting the pointer -to a pointer to a longer structure type. This longer type must start -with the reference count and type fields; the macro PyObject_HEAD should be -used for this (to accommodate for future changes). The implementation -of a particular object type can cast the object pointer to the proper -type and back. - -A standard interface exists for objects that contain an array of items -whose size is determined when the object is allocated. -*/ - -/* Py_DEBUG implies Py_TRACE_REFS. */ -#if defined(Py_DEBUG) && !defined(Py_TRACE_REFS) -#define Py_TRACE_REFS -#endif - -/* Py_TRACE_REFS implies Py_REF_DEBUG. */ -#if defined(Py_TRACE_REFS) && !defined(Py_REF_DEBUG) -#define Py_REF_DEBUG -#endif - -#ifdef Py_TRACE_REFS -/* Define pointers to support a doubly-linked list of all live heap objects. */ -#define _PyObject_HEAD_EXTRA \ - struct _object *_ob_next; \ - struct _object *_ob_prev; - -#define _PyObject_EXTRA_INIT 0, 0, - -#else -#define _PyObject_HEAD_EXTRA -#define _PyObject_EXTRA_INIT -#endif - -/* PyObject_HEAD defines the initial segment of every PyObject. */ -#define PyObject_HEAD \ - _PyObject_HEAD_EXTRA \ - Py_ssize_t ob_refcnt; \ - struct _typeobject *ob_type; - -#define PyObject_HEAD_INIT(type) \ - _PyObject_EXTRA_INIT \ - 1, type, - -#define PyVarObject_HEAD_INIT(type, size) \ - PyObject_HEAD_INIT(type) size, - -/* PyObject_VAR_HEAD defines the initial segment of all variable-size - * container objects. These end with a declaration of an array with 1 - * element, but enough space is malloc'ed so that the array actually - * has room for ob_size elements. Note that ob_size is an element count, - * not necessarily a byte count. - */ -#define PyObject_VAR_HEAD \ - PyObject_HEAD \ - Py_ssize_t ob_size; /* Number of items in variable part */ -#define Py_INVALID_SIZE (Py_ssize_t)-1 - -/* Nothing is actually declared to be a PyObject, but every pointer to - * a Python object can be cast to a PyObject*. This is inheritance built - * by hand. Similarly every pointer to a variable-size Python object can, - * in addition, be cast to PyVarObject*. - */ -typedef struct _object { - PyObject_HEAD -} PyObject; - -typedef struct { - PyObject_VAR_HEAD -} PyVarObject; - -#define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt) -#define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) -#define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) - -/* -Type objects contain a string containing the type name (to help somewhat -in debugging), the allocation parameters (see PyObject_New() and -PyObject_NewVar()), -and methods for accessing objects of the type. Methods are optional, a -nil pointer meaning that particular kind of access is not available for -this type. The Py_DECREF() macro uses the tp_dealloc method without -checking for a nil pointer; it should always be implemented except if -the implementation can guarantee that the reference count will never -reach zero (e.g., for statically allocated type objects). - -NB: the methods for certain type groups are now contained in separate -method blocks. -*/ - -typedef PyObject * (*unaryfunc)(PyObject *); -typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); -typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); -typedef int (*inquiry)(PyObject *); -typedef Py_ssize_t (*lenfunc)(PyObject *); -typedef int (*coercion)(PyObject **, PyObject **); -typedef PyObject *(*intargfunc)(PyObject *, int) Py_DEPRECATED(2.5); -typedef PyObject *(*intintargfunc)(PyObject *, int, int) Py_DEPRECATED(2.5); -typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t); -typedef PyObject *(*ssizessizeargfunc)(PyObject *, Py_ssize_t, Py_ssize_t); -typedef int(*intobjargproc)(PyObject *, int, PyObject *); -typedef int(*intintobjargproc)(PyObject *, int, int, PyObject *); -typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *); -typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *); -typedef int(*objobjargproc)(PyObject *, PyObject *, PyObject *); - - - -/* int-based buffer interface */ -typedef int (*getreadbufferproc)(PyObject *, int, void **); -typedef int (*getwritebufferproc)(PyObject *, int, void **); -typedef int (*getsegcountproc)(PyObject *, int *); -typedef int (*getcharbufferproc)(PyObject *, int, char **); -/* ssize_t-based buffer interface */ -typedef Py_ssize_t (*readbufferproc)(PyObject *, Py_ssize_t, void **); -typedef Py_ssize_t (*writebufferproc)(PyObject *, Py_ssize_t, void **); -typedef Py_ssize_t (*segcountproc)(PyObject *, Py_ssize_t *); -typedef Py_ssize_t (*charbufferproc)(PyObject *, Py_ssize_t, char **); - - -/* Py3k buffer interface */ -typedef struct bufferinfo { - void *buf; - PyObject *obj; /* owned reference */ - Py_ssize_t len; - Py_ssize_t itemsize; /* This is Py_ssize_t so it can be - pointed to by strides in simple case.*/ - int readonly; - int ndim; - char *format; - Py_ssize_t *shape; - Py_ssize_t *strides; - Py_ssize_t *suboffsets; - Py_ssize_t smalltable[2]; /* static store for shape and strides of - mono-dimensional buffers. */ - void *internal; -} Py_buffer; - -typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); -typedef void (*releasebufferproc)(PyObject *, Py_buffer *); - - /* Flags for getting buffers */ -#define PyBUF_SIMPLE 0 -#define PyBUF_WRITABLE 0x0001 -/* we used to include an E, backwards compatible alias */ -#define PyBUF_WRITEABLE PyBUF_WRITABLE -#define PyBUF_FORMAT 0x0004 -#define PyBUF_ND 0x0008 -#define PyBUF_STRIDES (0x0010 | PyBUF_ND) -#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) -#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) -#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) -#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) - -#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE) -#define PyBUF_CONTIG_RO (PyBUF_ND) - -#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE) -#define PyBUF_STRIDED_RO (PyBUF_STRIDES) - -#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT) -#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT) - -#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT) -#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT) - - -#define PyBUF_READ 0x100 -#define PyBUF_WRITE 0x200 -#define PyBUF_SHADOW 0x400 -/* end Py3k buffer interface */ - -typedef int (*objobjproc)(PyObject *, PyObject *); -typedef int (*visitproc)(PyObject *, void *); -typedef int (*traverseproc)(PyObject *, visitproc, void *); - -typedef struct { - /* For numbers without flag bit Py_TPFLAGS_CHECKTYPES set, all - arguments are guaranteed to be of the object's type (modulo - coercion hacks -- i.e. if the type's coercion function - returns other types, then these are allowed as well). Numbers that - have the Py_TPFLAGS_CHECKTYPES flag bit set should check *both* - arguments for proper type and implement the necessary conversions - in the slot functions themselves. */ - - binaryfunc nb_add; - binaryfunc nb_subtract; - binaryfunc nb_multiply; - binaryfunc nb_divide; - binaryfunc nb_remainder; - binaryfunc nb_divmod; - ternaryfunc nb_power; - unaryfunc nb_negative; - unaryfunc nb_positive; - unaryfunc nb_absolute; - inquiry nb_nonzero; - unaryfunc nb_invert; - binaryfunc nb_lshift; - binaryfunc nb_rshift; - binaryfunc nb_and; - binaryfunc nb_xor; - binaryfunc nb_or; - coercion nb_coerce; - unaryfunc nb_int; - unaryfunc nb_long; - unaryfunc nb_float; - unaryfunc nb_oct; - unaryfunc nb_hex; - /* Added in release 2.0 */ - binaryfunc nb_inplace_add; - binaryfunc nb_inplace_subtract; - binaryfunc nb_inplace_multiply; - binaryfunc nb_inplace_divide; - binaryfunc nb_inplace_remainder; - ternaryfunc nb_inplace_power; - binaryfunc nb_inplace_lshift; - binaryfunc nb_inplace_rshift; - binaryfunc nb_inplace_and; - binaryfunc nb_inplace_xor; - binaryfunc nb_inplace_or; - - /* Added in release 2.2 */ - /* The following require the Py_TPFLAGS_HAVE_CLASS flag */ - binaryfunc nb_floor_divide; - binaryfunc nb_true_divide; - binaryfunc nb_inplace_floor_divide; - binaryfunc nb_inplace_true_divide; - - /* Added in release 2.5 */ - unaryfunc nb_index; -} PyNumberMethods; - -typedef struct { - lenfunc sq_length; - binaryfunc sq_concat; - ssizeargfunc sq_repeat; - ssizeargfunc sq_item; - ssizessizeargfunc sq_slice; - ssizeobjargproc sq_ass_item; - ssizessizeobjargproc sq_ass_slice; - objobjproc sq_contains; - /* Added in release 2.0 */ - binaryfunc sq_inplace_concat; - ssizeargfunc sq_inplace_repeat; -} PySequenceMethods; - -typedef struct { - lenfunc mp_length; - binaryfunc mp_subscript; - objobjargproc mp_ass_subscript; -} PyMappingMethods; - -typedef struct { - readbufferproc bf_getreadbuffer; - writebufferproc bf_getwritebuffer; - segcountproc bf_getsegcount; - charbufferproc bf_getcharbuffer; - getbufferproc bf_getbuffer; - releasebufferproc bf_releasebuffer; -} PyBufferProcs; - - -typedef void (*freefunc)(void *); -typedef void (*destructor)(PyObject *); -typedef int (*printfunc)(PyObject *, FILE *, int); -typedef PyObject *(*getattrfunc)(PyObject *, char *); -typedef PyObject *(*getattrofunc)(PyObject *, PyObject *); -typedef int (*setattrfunc)(PyObject *, char *, PyObject *); -typedef int (*setattrofunc)(PyObject *, PyObject *, PyObject *); -typedef int (*cmpfunc)(PyObject *, PyObject *); -typedef PyObject *(*reprfunc)(PyObject *); -typedef long (*hashfunc)(PyObject *); -typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int); -typedef PyObject *(*getiterfunc) (PyObject *); -typedef PyObject *(*iternextfunc) (PyObject *); -typedef PyObject *(*descrgetfunc) (PyObject *, PyObject *, PyObject *); -typedef int (*descrsetfunc) (PyObject *, PyObject *, PyObject *); -typedef int (*initproc)(PyObject *, PyObject *, PyObject *); -typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *); -typedef PyObject *(*allocfunc)(struct _typeobject *, Py_ssize_t); - -typedef struct _typeobject { - PyObject_VAR_HEAD - const char *tp_name; /* For printing, in format "<module>.<name>" */ - Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */ - - /* Methods to implement standard operations */ - - destructor tp_dealloc; - printfunc tp_print; - getattrfunc tp_getattr; - setattrfunc tp_setattr; - cmpfunc tp_compare; - reprfunc tp_repr; - - /* Method suites for standard classes */ - - PyNumberMethods *tp_as_number; - PySequenceMethods *tp_as_sequence; - PyMappingMethods *tp_as_mapping; - - /* More standard operations (here for binary compatibility) */ - - hashfunc tp_hash; - ternaryfunc tp_call; - reprfunc tp_str; - getattrofunc tp_getattro; - setattrofunc tp_setattro; - - /* Functions to access object as input/output buffer */ - PyBufferProcs *tp_as_buffer; - - /* Flags to define presence of optional/expanded features */ - long tp_flags; - - const char *tp_doc; /* Documentation string */ - - /* Assigned meaning in release 2.0 */ - /* call function for all accessible objects */ - traverseproc tp_traverse; - - /* delete references to contained objects */ - inquiry tp_clear; - - /* Assigned meaning in release 2.1 */ - /* rich comparisons */ - richcmpfunc tp_richcompare; - - /* weak reference enabler */ - Py_ssize_t tp_weaklistoffset; - - /* Added in release 2.2 */ - /* Iterators */ - getiterfunc tp_iter; - iternextfunc tp_iternext; - - /* Attribute descriptor and subclassing stuff */ - struct PyMethodDef *tp_methods; - struct PyMemberDef *tp_members; - struct PyGetSetDef *tp_getset; - struct _typeobject *tp_base; - PyObject *tp_dict; - descrgetfunc tp_descr_get; - descrsetfunc tp_descr_set; - Py_ssize_t tp_dictoffset; - initproc tp_init; - allocfunc tp_alloc; - newfunc tp_new; - freefunc tp_free; /* Low-level free-memory routine */ - inquiry tp_is_gc; /* For PyObject_IS_GC */ - PyObject *tp_bases; - PyObject *tp_mro; /* method resolution order */ - PyObject *tp_cache; - PyObject *tp_subclasses; - PyObject *tp_weaklist; - destructor tp_del; - - /* Type attribute cache version tag. Added in version 2.6 */ - unsigned int tp_version_tag; - -#ifdef COUNT_ALLOCS - /* these must be last and never explicitly initialized */ - Py_ssize_t tp_allocs; - Py_ssize_t tp_frees; - Py_ssize_t tp_maxalloc; - struct _typeobject *tp_prev; - struct _typeobject *tp_next; -#endif -} PyTypeObject; - - -/* The *real* layout of a type object when allocated on the heap */ -typedef struct _heaptypeobject { - /* Note: there's a dependency on the order of these members - in slotptr() in typeobject.c . */ - PyTypeObject ht_type; - PyNumberMethods as_number; - PyMappingMethods as_mapping; - PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, - so that the mapping wins when both - the mapping and the sequence define - a given operator (e.g. __getitem__). - see add_operators() in typeobject.c . */ - PyBufferProcs as_buffer; - PyObject *ht_name, *ht_slots; - /* here are optional user slots, followed by the members. */ -} PyHeapTypeObject; - -/* access macro to the members which are floating "behind" the object */ -#define PyHeapType_GET_MEMBERS(etype) \ - ((PyMemberDef *)(((char *)etype) + Py_TYPE(etype)->tp_basicsize)) - - -/* Generic type check */ -PyAPI_FUNC(int) PyType_IsSubtype(PyTypeObject *, PyTypeObject *); -#define PyObject_TypeCheck(ob, tp) \ - (Py_TYPE(ob) == (tp) || PyType_IsSubtype(Py_TYPE(ob), (tp))) - -PyAPI_DATA(PyTypeObject) PyType_Type; /* built-in 'type' */ -PyAPI_DATA(PyTypeObject) PyBaseObject_Type; /* built-in 'object' */ -PyAPI_DATA(PyTypeObject) PySuper_Type; /* built-in 'super' */ - -#define PyType_Check(op) \ - PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_TYPE_SUBCLASS) -#define PyType_CheckExact(op) (Py_TYPE(op) == &PyType_Type) - -PyAPI_FUNC(int) PyType_Ready(PyTypeObject *); -PyAPI_FUNC(PyObject *) PyType_GenericAlloc(PyTypeObject *, Py_ssize_t); -PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *, - PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *); -PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, char *, PyObject **); -PyAPI_FUNC(unsigned int) PyType_ClearCache(void); -PyAPI_FUNC(void) PyType_Modified(PyTypeObject *); - -/* Generic operations on objects */ -PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); -PyAPI_FUNC(void) _PyObject_Dump(PyObject *); -PyAPI_FUNC(PyObject *) PyObject_Repr(PyObject *); -PyAPI_FUNC(PyObject *) _PyObject_Str(PyObject *); -PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *); -#define PyObject_Bytes PyObject_Str -#ifdef Py_USING_UNICODE -PyAPI_FUNC(PyObject *) PyObject_Unicode(PyObject *); -#endif -PyAPI_FUNC(int) PyObject_Compare(PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int); -PyAPI_FUNC(int) PyObject_RichCompareBool(PyObject *, PyObject *, int); -PyAPI_FUNC(PyObject *) PyObject_GetAttrString(PyObject *, const char *); -PyAPI_FUNC(int) PyObject_SetAttrString(PyObject *, const char *, PyObject *); -PyAPI_FUNC(int) PyObject_HasAttrString(PyObject *, const char *); -PyAPI_FUNC(PyObject *) PyObject_GetAttr(PyObject *, PyObject *); -PyAPI_FUNC(int) PyObject_SetAttr(PyObject *, PyObject *, PyObject *); -PyAPI_FUNC(int) PyObject_HasAttr(PyObject *, PyObject *); -PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *); -PyAPI_FUNC(PyObject *) PyObject_SelfIter(PyObject *); -PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *); -PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *); -PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *, - PyObject *, PyObject *); -PyAPI_FUNC(long) PyObject_Hash(PyObject *); -PyAPI_FUNC(long) PyObject_HashNotImplemented(PyObject *); -PyAPI_FUNC(int) PyObject_IsTrue(PyObject *); -PyAPI_FUNC(int) PyObject_Not(PyObject *); -PyAPI_FUNC(int) PyCallable_Check(PyObject *); -PyAPI_FUNC(int) PyNumber_Coerce(PyObject **, PyObject **); -PyAPI_FUNC(int) PyNumber_CoerceEx(PyObject **, PyObject **); - -PyAPI_FUNC(void) PyObject_ClearWeakRefs(PyObject *); - -/* A slot function whose address we need to compare */ -extern int _PyObject_SlotCompare(PyObject *, PyObject *); -/* Same as PyObject_Generic{Get,Set}Attr, but passing the attributes - dict as the last parameter. */ -PyAPI_FUNC(PyObject *) -_PyObject_GenericGetAttrWithDict(PyObject *, PyObject *, PyObject *); -PyAPI_FUNC(int) -_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *, - PyObject *, PyObject *); - - -/* PyObject_Dir(obj) acts like Python __builtin__.dir(obj), returning a - list of strings. PyObject_Dir(NULL) is like __builtin__.dir(), - returning the names of the current locals. In this case, if there are - no current locals, NULL is returned, and PyErr_Occurred() is false. -*/ -PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *); - - -/* Helpers for printing recursive container types */ -PyAPI_FUNC(int) Py_ReprEnter(PyObject *); -PyAPI_FUNC(void) Py_ReprLeave(PyObject *); - -/* Helpers for hash functions */ -PyAPI_FUNC(long) _Py_HashDouble(double); -PyAPI_FUNC(long) _Py_HashPointer(void*); - -typedef struct { - long prefix; - long suffix; -} _Py_HashSecret_t; -PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; - -#ifdef Py_DEBUG -PyAPI_DATA(int) _Py_HashSecret_Initialized; -#endif - -/* Helper for passing objects to printf and the like. - Leaks refcounts. Don't use it! -*/ -#define PyObject_REPR(obj) PyString_AS_STRING(PyObject_Repr(obj)) - -/* Flag bits for printing: */ -#define Py_PRINT_RAW 1 /* No string quotes etc. */ - -/* -`Type flags (tp_flags) - -These flags are used to extend the type structure in a backwards-compatible -fashion. Extensions can use the flags to indicate (and test) when a given -type structure contains a new feature. The Python core will use these when -introducing new functionality between major revisions (to avoid mid-version -changes in the PYTHON_API_VERSION). - -Arbitration of the flag bit positions will need to be coordinated among -all extension writers who publically release their extensions (this will -be fewer than you might expect!).. - -Python 1.5.2 introduced the bf_getcharbuffer slot into PyBufferProcs. - -Type definitions should use Py_TPFLAGS_DEFAULT for their tp_flags value. - -Code can use PyType_HasFeature(type_ob, flag_value) to test whether the -given type object has a specified feature. - -NOTE: when building the core, Py_TPFLAGS_DEFAULT includes -Py_TPFLAGS_HAVE_VERSION_TAG; outside the core, it doesn't. This is so -that extensions that modify tp_dict of their own types directly don't -break, since this was allowed in 2.5. In 3.0 they will have to -manually remove this flag though! -*/ - -/* PyBufferProcs contains bf_getcharbuffer */ -#define Py_TPFLAGS_HAVE_GETCHARBUFFER (1L<<0) - -/* PySequenceMethods contains sq_contains */ -#define Py_TPFLAGS_HAVE_SEQUENCE_IN (1L<<1) - -/* This is here for backwards compatibility. Extensions that use the old GC - * API will still compile but the objects will not be tracked by the GC. */ -#define Py_TPFLAGS_GC 0 /* used to be (1L<<2) */ - -/* PySequenceMethods and PyNumberMethods contain in-place operators */ -#define Py_TPFLAGS_HAVE_INPLACEOPS (1L<<3) - -/* PyNumberMethods do their own coercion */ -#define Py_TPFLAGS_CHECKTYPES (1L<<4) - -/* tp_richcompare is defined */ -#define Py_TPFLAGS_HAVE_RICHCOMPARE (1L<<5) - -/* Objects which are weakly referencable if their tp_weaklistoffset is >0 */ -#define Py_TPFLAGS_HAVE_WEAKREFS (1L<<6) - -/* tp_iter is defined */ -#define Py_TPFLAGS_HAVE_ITER (1L<<7) - -/* New members introduced by Python 2.2 exist */ -#define Py_TPFLAGS_HAVE_CLASS (1L<<8) - -/* Set if the type object is dynamically allocated */ -#define Py_TPFLAGS_HEAPTYPE (1L<<9) - -/* Set if the type allows subclassing */ -#define Py_TPFLAGS_BASETYPE (1L<<10) - -/* Set if the type is 'ready' -- fully initialized */ -#define Py_TPFLAGS_READY (1L<<12) - -/* Set while the type is being 'readied', to prevent recursive ready calls */ -#define Py_TPFLAGS_READYING (1L<<13) - -/* Objects support garbage collection (see objimp.h) */ -#define Py_TPFLAGS_HAVE_GC (1L<<14) - -/* These two bits are preserved for Stackless Python, next after this is 17 */ -#ifdef STACKLESS -#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION (3L<<15) -#else -#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION 0 -#endif - -/* Objects support nb_index in PyNumberMethods */ -#define Py_TPFLAGS_HAVE_INDEX (1L<<17) - -/* Objects support type attribute cache */ -#define Py_TPFLAGS_HAVE_VERSION_TAG (1L<<18) -#define Py_TPFLAGS_VALID_VERSION_TAG (1L<<19) - -/* Type is abstract and cannot be instantiated */ -#define Py_TPFLAGS_IS_ABSTRACT (1L<<20) - -/* Has the new buffer protocol */ -#define Py_TPFLAGS_HAVE_NEWBUFFER (1L<<21) - -/* These flags are used to determine if a type is a subclass. */ -#define Py_TPFLAGS_INT_SUBCLASS (1L<<23) -#define Py_TPFLAGS_LONG_SUBCLASS (1L<<24) -#define Py_TPFLAGS_LIST_SUBCLASS (1L<<25) -#define Py_TPFLAGS_TUPLE_SUBCLASS (1L<<26) -#define Py_TPFLAGS_STRING_SUBCLASS (1L<<27) -#define Py_TPFLAGS_UNICODE_SUBCLASS (1L<<28) -#define Py_TPFLAGS_DICT_SUBCLASS (1L<<29) -#define Py_TPFLAGS_BASE_EXC_SUBCLASS (1L<<30) -#define Py_TPFLAGS_TYPE_SUBCLASS (1L<<31) - -#define Py_TPFLAGS_DEFAULT_EXTERNAL ( \ - Py_TPFLAGS_HAVE_GETCHARBUFFER | \ - Py_TPFLAGS_HAVE_SEQUENCE_IN | \ - Py_TPFLAGS_HAVE_INPLACEOPS | \ - Py_TPFLAGS_HAVE_RICHCOMPARE | \ - Py_TPFLAGS_HAVE_WEAKREFS | \ - Py_TPFLAGS_HAVE_ITER | \ - Py_TPFLAGS_HAVE_CLASS | \ - Py_TPFLAGS_HAVE_STACKLESS_EXTENSION | \ - Py_TPFLAGS_HAVE_INDEX | \ - 0) -#define Py_TPFLAGS_DEFAULT_CORE (Py_TPFLAGS_DEFAULT_EXTERNAL | \ - Py_TPFLAGS_HAVE_VERSION_TAG) - -#ifdef Py_BUILD_CORE -#define Py_TPFLAGS_DEFAULT Py_TPFLAGS_DEFAULT_CORE -#else -#define Py_TPFLAGS_DEFAULT Py_TPFLAGS_DEFAULT_EXTERNAL -#endif - -#define PyType_HasFeature(t,f) (((t)->tp_flags & (f)) != 0) -#define PyType_FastSubclass(t,f) PyType_HasFeature(t,f) - - -/* -The macros Py_INCREF(op) and Py_DECREF(op) are used to increment or decrement -reference counts. Py_DECREF calls the object's deallocator function when -the refcount falls to 0; for -objects that don't contain references to other objects or heap memory -this can be the standard function free(). Both macros can be used -wherever a void expression is allowed. The argument must not be a -NULL pointer. If it may be NULL, use Py_XINCREF/Py_XDECREF instead. -The macro _Py_NewReference(op) initialize reference counts to 1, and -in special builds (Py_REF_DEBUG, Py_TRACE_REFS) performs additional -bookkeeping appropriate to the special build. - -We assume that the reference count field can never overflow; this can -be proven when the size of the field is the same as the pointer size, so -we ignore the possibility. Provided a C int is at least 32 bits (which -is implicitly assumed in many parts of this code), that's enough for -about 2**31 references to an object. - -XXX The following became out of date in Python 2.2, but I'm not sure -XXX what the full truth is now. Certainly, heap-allocated type objects -XXX can and should be deallocated. -Type objects should never be deallocated; the type pointer in an object -is not considered to be a reference to the type object, to save -complications in the deallocation function. (This is actually a -decision that's up to the implementer of each new type so if you want, -you can count such references to the type object.) - -*** WARNING*** The Py_DECREF macro must have a side-effect-free argument -since it may evaluate its argument multiple times. (The alternative -would be to mace it a proper function or assign it to a global temporary -variable first, both of which are slower; and in a multi-threaded -environment the global variable trick is not safe.) -*/ - -/* First define a pile of simple helper macros, one set per special - * build symbol. These either expand to the obvious things, or to - * nothing at all when the special mode isn't in effect. The main - * macros can later be defined just once then, yet expand to different - * things depending on which special build options are and aren't in effect. - * Trust me <wink>: while painful, this is 20x easier to understand than, - * e.g, defining _Py_NewReference five different times in a maze of nested - * #ifdefs (we used to do that -- it was impenetrable). - */ -#ifdef Py_REF_DEBUG -PyAPI_DATA(Py_ssize_t) _Py_RefTotal; -PyAPI_FUNC(void) _Py_NegativeRefcount(const char *fname, - int lineno, PyObject *op); -PyAPI_FUNC(PyObject *) _PyDict_Dummy(void); -PyAPI_FUNC(PyObject *) _PySet_Dummy(void); -PyAPI_FUNC(Py_ssize_t) _Py_GetRefTotal(void); -#define _Py_INC_REFTOTAL _Py_RefTotal++ -#define _Py_DEC_REFTOTAL _Py_RefTotal-- -#define _Py_REF_DEBUG_COMMA , -#define _Py_CHECK_REFCNT(OP) \ -{ if (((PyObject*)OP)->ob_refcnt < 0) \ - _Py_NegativeRefcount(__FILE__, __LINE__, \ - (PyObject *)(OP)); \ -} -#else -#define _Py_INC_REFTOTAL -#define _Py_DEC_REFTOTAL -#define _Py_REF_DEBUG_COMMA -#define _Py_CHECK_REFCNT(OP) /* a semicolon */; -#endif /* Py_REF_DEBUG */ - -#ifdef COUNT_ALLOCS -PyAPI_FUNC(void) inc_count(PyTypeObject *); -PyAPI_FUNC(void) dec_count(PyTypeObject *); -#define _Py_INC_TPALLOCS(OP) inc_count(Py_TYPE(OP)) -#define _Py_INC_TPFREES(OP) dec_count(Py_TYPE(OP)) -#define _Py_DEC_TPFREES(OP) Py_TYPE(OP)->tp_frees-- -#define _Py_COUNT_ALLOCS_COMMA , -#else -#define _Py_INC_TPALLOCS(OP) -#define _Py_INC_TPFREES(OP) -#define _Py_DEC_TPFREES(OP) -#define _Py_COUNT_ALLOCS_COMMA -#endif /* COUNT_ALLOCS */ - -#ifdef Py_TRACE_REFS -/* Py_TRACE_REFS is such major surgery that we call external routines. */ -PyAPI_FUNC(void) _Py_NewReference(PyObject *); -PyAPI_FUNC(void) _Py_ForgetReference(PyObject *); -PyAPI_FUNC(void) _Py_Dealloc(PyObject *); -PyAPI_FUNC(void) _Py_PrintReferences(FILE *); -PyAPI_FUNC(void) _Py_PrintReferenceAddresses(FILE *); -PyAPI_FUNC(void) _Py_AddToAllObjects(PyObject *, int force); - -#else -/* Without Py_TRACE_REFS, there's little enough to do that we expand code - * inline. - */ -#define _Py_NewReference(op) ( \ - _Py_INC_TPALLOCS(op) _Py_COUNT_ALLOCS_COMMA \ - _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA \ - Py_REFCNT(op) = 1) - -#define _Py_ForgetReference(op) _Py_INC_TPFREES(op) - -#define _Py_Dealloc(op) ( \ - _Py_INC_TPFREES(op) _Py_COUNT_ALLOCS_COMMA \ - (*Py_TYPE(op)->tp_dealloc)((PyObject *)(op))) -#endif /* !Py_TRACE_REFS */ - -#define Py_INCREF(op) ( \ - _Py_INC_REFTOTAL _Py_REF_DEBUG_COMMA \ - ((PyObject*)(op))->ob_refcnt++) - -#define Py_DECREF(op) \ - do { \ - if (_Py_DEC_REFTOTAL _Py_REF_DEBUG_COMMA \ - --((PyObject*)(op))->ob_refcnt != 0) \ - _Py_CHECK_REFCNT(op) \ - else \ - _Py_Dealloc((PyObject *)(op)); \ - } while (0) - -/* Safely decref `op` and set `op` to NULL, especially useful in tp_clear - * and tp_dealloc implementatons. - * - * Note that "the obvious" code can be deadly: - * - * Py_XDECREF(op); - * op = NULL; - * - * Typically, `op` is something like self->containee, and `self` is done - * using its `containee` member. In the code sequence above, suppose - * `containee` is non-NULL with a refcount of 1. Its refcount falls to - * 0 on the first line, which can trigger an arbitrary amount of code, - * possibly including finalizers (like __del__ methods or weakref callbacks) - * coded in Python, which in turn can release the GIL and allow other threads - * to run, etc. Such code may even invoke methods of `self` again, or cause - * cyclic gc to trigger, but-- oops! --self->containee still points to the - * object being torn down, and it may be in an insane state while being torn - * down. This has in fact been a rich historic source of miserable (rare & - * hard-to-diagnose) segfaulting (and other) bugs. - * - * The safe way is: - * - * Py_CLEAR(op); - * - * That arranges to set `op` to NULL _before_ decref'ing, so that any code - * triggered as a side-effect of `op` getting torn down no longer believes - * `op` points to a valid object. - * - * There are cases where it's safe to use the naive code, but they're brittle. - * For example, if `op` points to a Python integer, you know that destroying - * one of those can't cause problems -- but in part that relies on that - * Python integers aren't currently weakly referencable. Best practice is - * to use Py_CLEAR() even if you can't think of a reason for why you need to. - */ -#define Py_CLEAR(op) \ - do { \ - if (op) { \ - PyObject *_py_tmp = (PyObject *)(op); \ - (op) = NULL; \ - Py_DECREF(_py_tmp); \ - } \ - } while (0) - -/* Macros to use in case the object pointer may be NULL: */ -#define Py_XINCREF(op) do { if ((op) == NULL) ; else Py_INCREF(op); } while (0) -#define Py_XDECREF(op) do { if ((op) == NULL) ; else Py_DECREF(op); } while (0) - -/* Safely decref `op` and set `op` to `op2`. - * - * As in case of Py_CLEAR "the obvious" code can be deadly: - * - * Py_DECREF(op); - * op = op2; - * - * The safe way is: - * - * Py_SETREF(op, op2); - * - * That arranges to set `op` to `op2` _before_ decref'ing, so that any code - * triggered as a side-effect of `op` getting torn down no longer believes - * `op` points to a valid object. - * - * Py_XSETREF is a variant of Py_SETREF that uses Py_XDECREF instead of - * Py_DECREF. - */ - -#define Py_SETREF(op, op2) \ - do { \ - PyObject *_py_tmp = (PyObject *)(op); \ - (op) = (op2); \ - Py_DECREF(_py_tmp); \ - } while (0) - -#define Py_XSETREF(op, op2) \ - do { \ - PyObject *_py_tmp = (PyObject *)(op); \ - (op) = (op2); \ - Py_XDECREF(_py_tmp); \ - } while (0) - -/* -These are provided as conveniences to Python runtime embedders, so that -they can have object code that is not dependent on Python compilation flags. -*/ -PyAPI_FUNC(void) Py_IncRef(PyObject *); -PyAPI_FUNC(void) Py_DecRef(PyObject *); - -/* -_Py_NoneStruct is an object of undefined type which can be used in contexts -where NULL (nil) is not suitable (since NULL often means 'error'). - -Don't forget to apply Py_INCREF() when returning this value!!! -*/ -PyAPI_DATA(PyObject) _Py_NoneStruct; /* Don't use this directly */ -#define Py_None (&_Py_NoneStruct) - -/* Macro for returning Py_None from a function */ -#define Py_RETURN_NONE return Py_INCREF(Py_None), Py_None - -/* -Py_NotImplemented is a singleton used to signal that an operation is -not implemented for a given type combination. -*/ -PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */ -#define Py_NotImplemented (&_Py_NotImplementedStruct) - -/* Rich comparison opcodes */ -#define Py_LT 0 -#define Py_LE 1 -#define Py_EQ 2 -#define Py_NE 3 -#define Py_GT 4 -#define Py_GE 5 - -/* Maps Py_LT to Py_GT, ..., Py_GE to Py_LE. - * Defined in object.c. - */ -PyAPI_DATA(int) _Py_SwappedOp[]; - -/* -Define staticforward and statichere for source compatibility with old -C extensions. - -The staticforward define was needed to support certain broken C -compilers (notably SCO ODT 3.0, perhaps early AIX as well) botched the -static keyword when it was used with a forward declaration of a static -initialized structure. Standard C allows the forward declaration with -static, and we've decided to stop catering to broken C compilers. -(In fact, we expect that the compilers are all fixed eight years later.) -*/ - -#define staticforward static -#define statichere static - - -/* -More conventions -================ - -Argument Checking ------------------ - -Functions that take objects as arguments normally don't check for nil -arguments, but they do check the type of the argument, and return an -error if the function doesn't apply to the type. - -Failure Modes -------------- - -Functions may fail for a variety of reasons, including running out of -memory. This is communicated to the caller in two ways: an error string -is set (see errors.h), and the function result differs: functions that -normally return a pointer return NULL for failure, functions returning -an integer return -1 (which could be a legal return value too!), and -other functions return 0 for success and -1 for failure. -Callers should always check for errors before using the result. If -an error was set, the caller must either explicitly clear it, or pass -the error on to its caller. - -Reference Counts ----------------- - -It takes a while to get used to the proper usage of reference counts. - -Functions that create an object set the reference count to 1; such new -objects must be stored somewhere or destroyed again with Py_DECREF(). -Some functions that 'store' objects, such as PyTuple_SetItem() and -PyList_SetItem(), -don't increment the reference count of the object, since the most -frequent use is to store a fresh object. Functions that 'retrieve' -objects, such as PyTuple_GetItem() and PyDict_GetItemString(), also -don't increment -the reference count, since most frequently the object is only looked at -quickly. Thus, to retrieve an object and store it again, the caller -must call Py_INCREF() explicitly. - -NOTE: functions that 'consume' a reference count, like -PyList_SetItem(), consume the reference even if the object wasn't -successfully stored, to simplify error handling. - -It seems attractive to make other functions that take an object as -argument consume a reference count; however, this may quickly get -confusing (even the current practice is already confusing). Consider -it carefully, it may save lots of calls to Py_INCREF() and Py_DECREF() at -times. -*/ - - -/* Trashcan mechanism, thanks to Christian Tismer. - -When deallocating a container object, it's possible to trigger an unbounded -chain of deallocations, as each Py_DECREF in turn drops the refcount on "the -next" object in the chain to 0. This can easily lead to stack faults, and -especially in threads (which typically have less stack space to work with). - -A container object that participates in cyclic gc can avoid this by -bracketing the body of its tp_dealloc function with a pair of macros: - -static void -mytype_dealloc(mytype *p) -{ - ... declarations go here ... - - PyObject_GC_UnTrack(p); // must untrack first - Py_TRASHCAN_SAFE_BEGIN(p) - ... The body of the deallocator goes here, including all calls ... - ... to Py_DECREF on contained objects. ... - Py_TRASHCAN_SAFE_END(p) -} - -CAUTION: Never return from the middle of the body! If the body needs to -"get out early", put a label immediately before the Py_TRASHCAN_SAFE_END -call, and goto it. Else the call-depth counter (see below) will stay -above 0 forever, and the trashcan will never get emptied. - -How it works: The BEGIN macro increments a call-depth counter. So long -as this counter is small, the body of the deallocator is run directly without -further ado. But if the counter gets large, it instead adds p to a list of -objects to be deallocated later, skips the body of the deallocator, and -resumes execution after the END macro. The tp_dealloc routine then returns -without deallocating anything (and so unbounded call-stack depth is avoided). - -When the call stack finishes unwinding again, code generated by the END macro -notices this, and calls another routine to deallocate all the objects that -may have been added to the list of deferred deallocations. In effect, a -chain of N deallocations is broken into N / PyTrash_UNWIND_LEVEL pieces, -with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL. -*/ - -/* This is the old private API, invoked by the macros before 2.7.4. - Kept for binary compatibility of extensions. */ -PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*); -PyAPI_FUNC(void) _PyTrash_destroy_chain(void); -PyAPI_DATA(int) _PyTrash_delete_nesting; -PyAPI_DATA(PyObject *) _PyTrash_delete_later; - -/* The new thread-safe private API, invoked by the macros below. */ -PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyObject*); -PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(void); - -#define PyTrash_UNWIND_LEVEL 50 - -/* Note the workaround for when the thread state is NULL (issue #17703) */ -#define Py_TRASHCAN_SAFE_BEGIN(op) \ - do { \ - PyThreadState *_tstate = PyThreadState_GET(); \ - if (!_tstate || \ - _tstate->trash_delete_nesting < PyTrash_UNWIND_LEVEL) { \ - if (_tstate) \ - ++_tstate->trash_delete_nesting; - /* The body of the deallocator is here. */ -#define Py_TRASHCAN_SAFE_END(op) \ - if (_tstate) { \ - --_tstate->trash_delete_nesting; \ - if (_tstate->trash_delete_later \ - && _tstate->trash_delete_nesting <= 0) \ - _PyTrash_thread_destroy_chain(); \ - } \ - } \ - else \ - _PyTrash_thread_deposit_object((PyObject*)op); \ - } while (0); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OBJECT_H */
--- a/test/include/python2.7/objimpl.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,354 +0,0 @@ -/* The PyObject_ memory family: high-level object memory interfaces. - See pymem.h for the low-level PyMem_ family. -*/ - -#ifndef Py_OBJIMPL_H -#define Py_OBJIMPL_H - -#include "pymem.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* BEWARE: - - Each interface exports both functions and macros. Extension modules should - use the functions, to ensure binary compatibility across Python versions. - Because the Python implementation is free to change internal details, and - the macros may (or may not) expose details for speed, if you do use the - macros you must recompile your extensions with each Python release. - - Never mix calls to PyObject_ memory functions with calls to the platform - malloc/realloc/ calloc/free, or with calls to PyMem_. -*/ - -/* -Functions and macros for modules that implement new object types. - - - PyObject_New(type, typeobj) allocates memory for a new object of the given - type, and initializes part of it. 'type' must be the C structure type used - to represent the object, and 'typeobj' the address of the corresponding - type object. Reference count and type pointer are filled in; the rest of - the bytes of the object are *undefined*! The resulting expression type is - 'type *'. The size of the object is determined by the tp_basicsize field - of the type object. - - - PyObject_NewVar(type, typeobj, n) is similar but allocates a variable-size - object with room for n items. In addition to the refcount and type pointer - fields, this also fills in the ob_size field. - - - PyObject_Del(op) releases the memory allocated for an object. It does not - run a destructor -- it only frees the memory. PyObject_Free is identical. - - - PyObject_Init(op, typeobj) and PyObject_InitVar(op, typeobj, n) don't - allocate memory. Instead of a 'type' parameter, they take a pointer to a - new object (allocated by an arbitrary allocator), and initialize its object - header fields. - -Note that objects created with PyObject_{New, NewVar} are allocated using the -specialized Python allocator (implemented in obmalloc.c), if WITH_PYMALLOC is -enabled. In addition, a special debugging allocator is used if PYMALLOC_DEBUG -is also #defined. - -In case a specific form of memory management is needed (for example, if you -must use the platform malloc heap(s), or shared memory, or C++ local storage or -operator new), you must first allocate the object with your custom allocator, -then pass its pointer to PyObject_{Init, InitVar} for filling in its Python- -specific fields: reference count, type pointer, possibly others. You should -be aware that Python no control over these objects because they don't -cooperate with the Python memory manager. Such objects may not be eligible -for automatic garbage collection and you have to make sure that they are -released accordingly whenever their destructor gets called (cf. the specific -form of memory management you're using). - -Unless you have specific memory management requirements, use -PyObject_{New, NewVar, Del}. -*/ - -/* - * Raw object memory interface - * =========================== - */ - -/* Functions to call the same malloc/realloc/free as used by Python's - object allocator. If WITH_PYMALLOC is enabled, these may differ from - the platform malloc/realloc/free. The Python object allocator is - designed for fast, cache-conscious allocation of many "small" objects, - and with low hidden memory overhead. - - PyObject_Malloc(0) returns a unique non-NULL pointer if possible. - - PyObject_Realloc(NULL, n) acts like PyObject_Malloc(n). - PyObject_Realloc(p != NULL, 0) does not return NULL, or free the memory - at p. - - Returned pointers must be checked for NULL explicitly; no action is - performed on failure other than to return NULL (no warning it printed, no - exception is set, etc). - - For allocating objects, use PyObject_{New, NewVar} instead whenever - possible. The PyObject_{Malloc, Realloc, Free} family is exposed - so that you can exploit Python's small-block allocator for non-object - uses. If you must use these routines to allocate object memory, make sure - the object gets initialized via PyObject_{Init, InitVar} after obtaining - the raw memory. -*/ -PyAPI_FUNC(void *) PyObject_Malloc(size_t); -PyAPI_FUNC(void *) PyObject_Realloc(void *, size_t); -PyAPI_FUNC(void) PyObject_Free(void *); - - -/* Macros */ -#ifdef WITH_PYMALLOC -#ifdef PYMALLOC_DEBUG /* WITH_PYMALLOC && PYMALLOC_DEBUG */ -PyAPI_FUNC(void *) _PyObject_DebugMalloc(size_t nbytes); -PyAPI_FUNC(void *) _PyObject_DebugRealloc(void *p, size_t nbytes); -PyAPI_FUNC(void) _PyObject_DebugFree(void *p); -PyAPI_FUNC(void) _PyObject_DebugDumpAddress(const void *p); -PyAPI_FUNC(void) _PyObject_DebugCheckAddress(const void *p); -PyAPI_FUNC(void) _PyObject_DebugMallocStats(void); -PyAPI_FUNC(void *) _PyObject_DebugMallocApi(char api, size_t nbytes); -PyAPI_FUNC(void *) _PyObject_DebugReallocApi(char api, void *p, size_t nbytes); -PyAPI_FUNC(void) _PyObject_DebugFreeApi(char api, void *p); -PyAPI_FUNC(void) _PyObject_DebugCheckAddressApi(char api, const void *p); -PyAPI_FUNC(void *) _PyMem_DebugMalloc(size_t nbytes); -PyAPI_FUNC(void *) _PyMem_DebugRealloc(void *p, size_t nbytes); -PyAPI_FUNC(void) _PyMem_DebugFree(void *p); -#define PyObject_MALLOC _PyObject_DebugMalloc -#define PyObject_Malloc _PyObject_DebugMalloc -#define PyObject_REALLOC _PyObject_DebugRealloc -#define PyObject_Realloc _PyObject_DebugRealloc -#define PyObject_FREE _PyObject_DebugFree -#define PyObject_Free _PyObject_DebugFree - -#else /* WITH_PYMALLOC && ! PYMALLOC_DEBUG */ -#define PyObject_MALLOC PyObject_Malloc -#define PyObject_REALLOC PyObject_Realloc -#define PyObject_FREE PyObject_Free -#endif - -#else /* ! WITH_PYMALLOC */ -#define PyObject_MALLOC PyMem_MALLOC -#define PyObject_REALLOC PyMem_REALLOC -#define PyObject_FREE PyMem_FREE - -#endif /* WITH_PYMALLOC */ - -#define PyObject_Del PyObject_Free -#define PyObject_DEL PyObject_FREE - -/* for source compatibility with 2.2 */ -#define _PyObject_Del PyObject_Free - -/* - * Generic object allocator interface - * ================================== - */ - -/* Functions */ -PyAPI_FUNC(PyObject *) PyObject_Init(PyObject *, PyTypeObject *); -PyAPI_FUNC(PyVarObject *) PyObject_InitVar(PyVarObject *, - PyTypeObject *, Py_ssize_t); -PyAPI_FUNC(PyObject *) _PyObject_New(PyTypeObject *); -PyAPI_FUNC(PyVarObject *) _PyObject_NewVar(PyTypeObject *, Py_ssize_t); - -#define PyObject_New(type, typeobj) \ - ( (type *) _PyObject_New(typeobj) ) -#define PyObject_NewVar(type, typeobj, n) \ - ( (type *) _PyObject_NewVar((typeobj), (n)) ) - -/* Macros trading binary compatibility for speed. See also pymem.h. - Note that these macros expect non-NULL object pointers.*/ -#define PyObject_INIT(op, typeobj) \ - ( Py_TYPE(op) = (typeobj), _Py_NewReference((PyObject *)(op)), (op) ) -#define PyObject_INIT_VAR(op, typeobj, size) \ - ( Py_SIZE(op) = (size), PyObject_INIT((op), (typeobj)) ) - -#define _PyObject_SIZE(typeobj) ( (typeobj)->tp_basicsize ) - -/* _PyObject_VAR_SIZE returns the number of bytes (as size_t) allocated for a - vrbl-size object with nitems items, exclusive of gc overhead (if any). The - value is rounded up to the closest multiple of sizeof(void *), in order to - ensure that pointer fields at the end of the object are correctly aligned - for the platform (this is of special importance for subclasses of, e.g., - str or long, so that pointers can be stored after the embedded data). - - Note that there's no memory wastage in doing this, as malloc has to - return (at worst) pointer-aligned memory anyway. -*/ -#if ((SIZEOF_VOID_P - 1) & SIZEOF_VOID_P) != 0 -# error "_PyObject_VAR_SIZE requires SIZEOF_VOID_P be a power of 2" -#endif - -#define _PyObject_VAR_SIZE(typeobj, nitems) \ - (size_t) \ - ( ( (typeobj)->tp_basicsize + \ - (nitems)*(typeobj)->tp_itemsize + \ - (SIZEOF_VOID_P - 1) \ - ) & ~(SIZEOF_VOID_P - 1) \ - ) - -#define PyObject_NEW(type, typeobj) \ -( (type *) PyObject_Init( \ - (PyObject *) PyObject_MALLOC( _PyObject_SIZE(typeobj) ), (typeobj)) ) - -#define PyObject_NEW_VAR(type, typeobj, n) \ -( (type *) PyObject_InitVar( \ - (PyVarObject *) PyObject_MALLOC(_PyObject_VAR_SIZE((typeobj),(n)) ),\ - (typeobj), (n)) ) - -/* This example code implements an object constructor with a custom - allocator, where PyObject_New is inlined, and shows the important - distinction between two steps (at least): - 1) the actual allocation of the object storage; - 2) the initialization of the Python specific fields - in this storage with PyObject_{Init, InitVar}. - - PyObject * - YourObject_New(...) - { - PyObject *op; - - op = (PyObject *) Your_Allocator(_PyObject_SIZE(YourTypeStruct)); - if (op == NULL) - return PyErr_NoMemory(); - - PyObject_Init(op, &YourTypeStruct); - - op->ob_field = value; - ... - return op; - } - - Note that in C++, the use of the new operator usually implies that - the 1st step is performed automatically for you, so in a C++ class - constructor you would start directly with PyObject_Init/InitVar -*/ - -/* - * Garbage Collection Support - * ========================== - */ - -/* C equivalent of gc.collect(). */ -PyAPI_FUNC(Py_ssize_t) PyGC_Collect(void); - -/* Test if a type has a GC head */ -#define PyType_IS_GC(t) PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC) - -/* Test if an object has a GC head */ -#define PyObject_IS_GC(o) (PyType_IS_GC(Py_TYPE(o)) && \ - (Py_TYPE(o)->tp_is_gc == NULL || Py_TYPE(o)->tp_is_gc(o))) - -PyAPI_FUNC(PyVarObject *) _PyObject_GC_Resize(PyVarObject *, Py_ssize_t); -#define PyObject_GC_Resize(type, op, n) \ - ( (type *) _PyObject_GC_Resize((PyVarObject *)(op), (n)) ) - -/* for source compatibility with 2.2 */ -#define _PyObject_GC_Del PyObject_GC_Del - -/* GC information is stored BEFORE the object structure. */ -typedef union _gc_head { - struct { - union _gc_head *gc_next; - union _gc_head *gc_prev; - Py_ssize_t gc_refs; - } gc; - long double dummy; /* force worst-case alignment */ -} PyGC_Head; - -extern PyGC_Head *_PyGC_generation0; - -#define _Py_AS_GC(o) ((PyGC_Head *)(o)-1) - -#define _PyGC_REFS_UNTRACKED (-2) -#define _PyGC_REFS_REACHABLE (-3) -#define _PyGC_REFS_TENTATIVELY_UNREACHABLE (-4) - -/* Tell the GC to track this object. NB: While the object is tracked the - * collector it must be safe to call the ob_traverse method. */ -#define _PyObject_GC_TRACK(o) do { \ - PyGC_Head *g = _Py_AS_GC(o); \ - if (g->gc.gc_refs != _PyGC_REFS_UNTRACKED) \ - Py_FatalError("GC object already tracked"); \ - g->gc.gc_refs = _PyGC_REFS_REACHABLE; \ - g->gc.gc_next = _PyGC_generation0; \ - g->gc.gc_prev = _PyGC_generation0->gc.gc_prev; \ - g->gc.gc_prev->gc.gc_next = g; \ - _PyGC_generation0->gc.gc_prev = g; \ - } while (0); - -/* Tell the GC to stop tracking this object. - * gc_next doesn't need to be set to NULL, but doing so is a good - * way to provoke memory errors if calling code is confused. - */ -#define _PyObject_GC_UNTRACK(o) do { \ - PyGC_Head *g = _Py_AS_GC(o); \ - assert(g->gc.gc_refs != _PyGC_REFS_UNTRACKED); \ - g->gc.gc_refs = _PyGC_REFS_UNTRACKED; \ - g->gc.gc_prev->gc.gc_next = g->gc.gc_next; \ - g->gc.gc_next->gc.gc_prev = g->gc.gc_prev; \ - g->gc.gc_next = NULL; \ - } while (0); - -/* True if the object is currently tracked by the GC. */ -#define _PyObject_GC_IS_TRACKED(o) \ - ((_Py_AS_GC(o))->gc.gc_refs != _PyGC_REFS_UNTRACKED) - -/* True if the object may be tracked by the GC in the future, or already is. - This can be useful to implement some optimizations. */ -#define _PyObject_GC_MAY_BE_TRACKED(obj) \ - (PyObject_IS_GC(obj) && \ - (!PyTuple_CheckExact(obj) || _PyObject_GC_IS_TRACKED(obj))) - - -PyAPI_FUNC(PyObject *) _PyObject_GC_Malloc(size_t); -PyAPI_FUNC(PyObject *) _PyObject_GC_New(PyTypeObject *); -PyAPI_FUNC(PyVarObject *) _PyObject_GC_NewVar(PyTypeObject *, Py_ssize_t); -PyAPI_FUNC(void) PyObject_GC_Track(void *); -PyAPI_FUNC(void) PyObject_GC_UnTrack(void *); -PyAPI_FUNC(void) PyObject_GC_Del(void *); - -#define PyObject_GC_New(type, typeobj) \ - ( (type *) _PyObject_GC_New(typeobj) ) -#define PyObject_GC_NewVar(type, typeobj, n) \ - ( (type *) _PyObject_GC_NewVar((typeobj), (n)) ) - - -/* Utility macro to help write tp_traverse functions. - * To use this macro, the tp_traverse function must name its arguments - * "visit" and "arg". This is intended to keep tp_traverse functions - * looking as much alike as possible. - */ -#define Py_VISIT(op) \ - do { \ - if (op) { \ - int vret = visit((PyObject *)(op), arg); \ - if (vret) \ - return vret; \ - } \ - } while (0) - -/* This is here for the sake of backwards compatibility. Extensions that - * use the old GC API will still compile but the objects will not be - * tracked by the GC. */ -#define PyGC_HEAD_SIZE 0 -#define PyObject_GC_Init(op) -#define PyObject_GC_Fini(op) -#define PyObject_AS_GC(op) (op) -#define PyObject_FROM_GC(op) (op) - - -/* Test if a type supports weak references */ -#define PyType_SUPPORTS_WEAKREFS(t) \ - (PyType_HasFeature((t), Py_TPFLAGS_HAVE_WEAKREFS) \ - && ((t)->tp_weaklistoffset > 0)) - -#define PyObject_GET_WEAKREFS_LISTPTR(o) \ - ((PyObject **) (((char *) (o)) + Py_TYPE(o)->tp_weaklistoffset)) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OBJIMPL_H */
--- a/test/include/python2.7/opcode.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,171 +0,0 @@ -#ifndef Py_OPCODE_H -#define Py_OPCODE_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Instruction opcodes for compiled code */ - -#define STOP_CODE 0 -#define POP_TOP 1 -#define ROT_TWO 2 -#define ROT_THREE 3 -#define DUP_TOP 4 -#define ROT_FOUR 5 -#define NOP 9 - -#define UNARY_POSITIVE 10 -#define UNARY_NEGATIVE 11 -#define UNARY_NOT 12 -#define UNARY_CONVERT 13 - -#define UNARY_INVERT 15 - -#define BINARY_POWER 19 - -#define BINARY_MULTIPLY 20 -#define BINARY_DIVIDE 21 -#define BINARY_MODULO 22 -#define BINARY_ADD 23 -#define BINARY_SUBTRACT 24 -#define BINARY_SUBSCR 25 -#define BINARY_FLOOR_DIVIDE 26 -#define BINARY_TRUE_DIVIDE 27 -#define INPLACE_FLOOR_DIVIDE 28 -#define INPLACE_TRUE_DIVIDE 29 - -#define SLICE 30 -/* Also uses 31-33 */ -#define SLICE_1 31 -#define SLICE_2 32 -#define SLICE_3 33 - -#define STORE_SLICE 40 -/* Also uses 41-43 */ -#define STORE_SLICE_1 41 -#define STORE_SLICE_2 42 -#define STORE_SLICE_3 43 - -#define DELETE_SLICE 50 -/* Also uses 51-53 */ -#define DELETE_SLICE_1 51 -#define DELETE_SLICE_2 52 -#define DELETE_SLICE_3 53 - -#define STORE_MAP 54 -#define INPLACE_ADD 55 -#define INPLACE_SUBTRACT 56 -#define INPLACE_MULTIPLY 57 -#define INPLACE_DIVIDE 58 -#define INPLACE_MODULO 59 -#define STORE_SUBSCR 60 -#define DELETE_SUBSCR 61 - -#define BINARY_LSHIFT 62 -#define BINARY_RSHIFT 63 -#define BINARY_AND 64 -#define BINARY_XOR 65 -#define BINARY_OR 66 -#define INPLACE_POWER 67 -#define GET_ITER 68 - -#define PRINT_EXPR 70 -#define PRINT_ITEM 71 -#define PRINT_NEWLINE 72 -#define PRINT_ITEM_TO 73 -#define PRINT_NEWLINE_TO 74 -#define INPLACE_LSHIFT 75 -#define INPLACE_RSHIFT 76 -#define INPLACE_AND 77 -#define INPLACE_XOR 78 -#define INPLACE_OR 79 -#define BREAK_LOOP 80 -#define WITH_CLEANUP 81 -#define LOAD_LOCALS 82 -#define RETURN_VALUE 83 -#define IMPORT_STAR 84 -#define EXEC_STMT 85 -#define YIELD_VALUE 86 -#define POP_BLOCK 87 -#define END_FINALLY 88 -#define BUILD_CLASS 89 - -#define HAVE_ARGUMENT 90 /* Opcodes from here have an argument: */ - -#define STORE_NAME 90 /* Index in name list */ -#define DELETE_NAME 91 /* "" */ -#define UNPACK_SEQUENCE 92 /* Number of sequence items */ -#define FOR_ITER 93 -#define LIST_APPEND 94 - -#define STORE_ATTR 95 /* Index in name list */ -#define DELETE_ATTR 96 /* "" */ -#define STORE_GLOBAL 97 /* "" */ -#define DELETE_GLOBAL 98 /* "" */ -#define DUP_TOPX 99 /* number of items to duplicate */ -#define LOAD_CONST 100 /* Index in const list */ -#define LOAD_NAME 101 /* Index in name list */ -#define BUILD_TUPLE 102 /* Number of tuple items */ -#define BUILD_LIST 103 /* Number of list items */ -#define BUILD_SET 104 /* Number of set items */ -#define BUILD_MAP 105 /* Always zero for now */ -#define LOAD_ATTR 106 /* Index in name list */ -#define COMPARE_OP 107 /* Comparison operator */ -#define IMPORT_NAME 108 /* Index in name list */ -#define IMPORT_FROM 109 /* Index in name list */ -#define JUMP_FORWARD 110 /* Number of bytes to skip */ - -#define JUMP_IF_FALSE_OR_POP 111 /* Target byte offset from beginning - of code */ -#define JUMP_IF_TRUE_OR_POP 112 /* "" */ -#define JUMP_ABSOLUTE 113 /* "" */ -#define POP_JUMP_IF_FALSE 114 /* "" */ -#define POP_JUMP_IF_TRUE 115 /* "" */ - -#define LOAD_GLOBAL 116 /* Index in name list */ - -#define CONTINUE_LOOP 119 /* Start of loop (absolute) */ -#define SETUP_LOOP 120 /* Target address (relative) */ -#define SETUP_EXCEPT 121 /* "" */ -#define SETUP_FINALLY 122 /* "" */ - -#define LOAD_FAST 124 /* Local variable number */ -#define STORE_FAST 125 /* Local variable number */ -#define DELETE_FAST 126 /* Local variable number */ - -#define RAISE_VARARGS 130 /* Number of raise arguments (1, 2 or 3) */ -/* CALL_FUNCTION_XXX opcodes defined below depend on this definition */ -#define CALL_FUNCTION 131 /* #args + (#kwargs<<8) */ -#define MAKE_FUNCTION 132 /* #defaults */ -#define BUILD_SLICE 133 /* Number of items */ - -#define MAKE_CLOSURE 134 /* #free vars */ -#define LOAD_CLOSURE 135 /* Load free variable from closure */ -#define LOAD_DEREF 136 /* Load and dereference from closure cell */ -#define STORE_DEREF 137 /* Store into cell */ - -/* The next 3 opcodes must be contiguous and satisfy - (CALL_FUNCTION_VAR - CALL_FUNCTION) & 3 == 1 */ -#define CALL_FUNCTION_VAR 140 /* #args + (#kwargs<<8) */ -#define CALL_FUNCTION_KW 141 /* #args + (#kwargs<<8) */ -#define CALL_FUNCTION_VAR_KW 142 /* #args + (#kwargs<<8) */ - -#define SETUP_WITH 143 - -/* Support for opargs more than 16 bits long */ -#define EXTENDED_ARG 145 - -#define SET_ADD 146 -#define MAP_ADD 147 - - -enum cmp_op {PyCmp_LT=Py_LT, PyCmp_LE=Py_LE, PyCmp_EQ=Py_EQ, PyCmp_NE=Py_NE, PyCmp_GT=Py_GT, PyCmp_GE=Py_GE, - PyCmp_IN, PyCmp_NOT_IN, PyCmp_IS, PyCmp_IS_NOT, PyCmp_EXC_MATCH, PyCmp_BAD}; - -#define HAS_ARG(op) ((op) >= HAVE_ARGUMENT) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OPCODE_H */
--- a/test/include/python2.7/osdefs.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ -#ifndef Py_OSDEFS_H -#define Py_OSDEFS_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Operating system dependencies */ - -/* Mod by chrish: QNX has WATCOM, but isn't DOS */ -#if !defined(__QNX__) -#if defined(MS_WINDOWS) || defined(__BORLANDC__) || defined(__WATCOMC__) || defined(__DJGPP__) || defined(PYOS_OS2) -#if defined(PYOS_OS2) && defined(PYCC_GCC) -#define MAXPATHLEN 260 -#define SEP '/' -#define ALTSEP '\\' -#else -#define SEP '\\' -#define ALTSEP '/' -#define MAXPATHLEN 256 -#endif -#define DELIM ';' -#endif -#endif - -#ifdef RISCOS -#define SEP '.' -#define MAXPATHLEN 256 -#define DELIM ',' -#endif - - -/* Filename separator */ -#ifndef SEP -#define SEP '/' -#endif - -/* Max pathname length */ -#ifdef __hpux -#include <sys/param.h> -#include <limits.h> -#ifndef PATH_MAX -#define PATH_MAX MAXPATHLEN -#endif -#endif - -#ifndef MAXPATHLEN -#if defined(PATH_MAX) && PATH_MAX > 1024 -#define MAXPATHLEN PATH_MAX -#else -#define MAXPATHLEN 1024 -#endif -#endif - -/* Search path entry delimiter */ -#ifndef DELIM -#define DELIM ':' -#endif - -#ifdef __cplusplus -} -#endif -#endif /* !Py_OSDEFS_H */
--- a/test/include/python2.7/parsetok.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,64 +0,0 @@ - -/* Parser-tokenizer link interface */ - -#ifndef Py_PARSETOK_H -#define Py_PARSETOK_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - int error; - const char *filename; - int lineno; - int offset; - char *text; - int token; - int expected; -} perrdetail; - -#if 0 -#define PyPARSE_YIELD_IS_KEYWORD 0x0001 -#endif - -#define PyPARSE_DONT_IMPLY_DEDENT 0x0002 - -#if 0 -#define PyPARSE_WITH_IS_KEYWORD 0x0003 -#endif - -#define PyPARSE_PRINT_IS_FUNCTION 0x0004 -#define PyPARSE_UNICODE_LITERALS 0x0008 - - - -PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int, - perrdetail *); -PyAPI_FUNC(node *) PyParser_ParseFile (FILE *, const char *, grammar *, int, - char *, char *, perrdetail *); - -PyAPI_FUNC(node *) PyParser_ParseStringFlags(const char *, grammar *, int, - perrdetail *, int); -PyAPI_FUNC(node *) PyParser_ParseFileFlags(FILE *, const char *, grammar *, - int, char *, char *, - perrdetail *, int); -PyAPI_FUNC(node *) PyParser_ParseFileFlagsEx(FILE *, const char *, grammar *, - int, char *, char *, - perrdetail *, int *); - -PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilename(const char *, - const char *, - grammar *, int, - perrdetail *, int); -PyAPI_FUNC(node *) PyParser_ParseStringFlagsFilenameEx(const char *, - const char *, - grammar *, int, - perrdetail *, int *); - -/* Note that he following function is defined in pythonrun.c not parsetok.c. */ -PyAPI_FUNC(void) PyParser_SetError(perrdetail *); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_PARSETOK_H */
--- a/test/include/python2.7/patchlevel.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ - -/* Newfangled version identification scheme. - - This scheme was added in Python 1.5.2b2; before that time, only PATCHLEVEL - was available. To test for presence of the scheme, test for - defined(PY_MAJOR_VERSION). - - When the major or minor version changes, the VERSION variable in - configure.ac must also be changed. - - There is also (independent) API version information in modsupport.h. -*/ - -/* Values for PY_RELEASE_LEVEL */ -#define PY_RELEASE_LEVEL_ALPHA 0xA -#define PY_RELEASE_LEVEL_BETA 0xB -#define PY_RELEASE_LEVEL_GAMMA 0xC /* For release candidates */ -#define PY_RELEASE_LEVEL_FINAL 0xF /* Serial should be 0 here */ - /* Higher for patch releases */ - -/* Version parsed out into numeric values */ -/*--start constants--*/ -#define PY_MAJOR_VERSION 2 -#define PY_MINOR_VERSION 7 -#define PY_MICRO_VERSION 12 -#define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL -#define PY_RELEASE_SERIAL 0 - -/* Version as a string */ -#define PY_VERSION "2.7.12" -/*--end constants--*/ - -/* Subversion Revision number of this file (not of the repository). Empty - since Mercurial migration. */ -#define PY_PATCHLEVEL_REVISION "" - -/* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2. - Use this for numeric comparisons, e.g. #if PY_VERSION_HEX >= ... */ -#define PY_VERSION_HEX ((PY_MAJOR_VERSION << 24) | \ - (PY_MINOR_VERSION << 16) | \ - (PY_MICRO_VERSION << 8) | \ - (PY_RELEASE_LEVEL << 4) | \ - (PY_RELEASE_SERIAL << 0))
--- a/test/include/python2.7/pgen.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ -#ifndef Py_PGEN_H -#define Py_PGEN_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Parser generator interface */ - -extern grammar *meta_grammar(void); - -struct _node; -extern grammar *pgen(struct _node *); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_PGEN_H */
--- a/test/include/python2.7/pgenheaders.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -#ifndef Py_PGENHEADERS_H -#define Py_PGENHEADERS_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Include files and extern declarations used by most of the parser. */ - -#include "Python.h" - -PyAPI_FUNC(void) PySys_WriteStdout(const char *format, ...) - Py_GCC_ATTRIBUTE((format(printf, 1, 2))); -PyAPI_FUNC(void) PySys_WriteStderr(const char *format, ...) - Py_GCC_ATTRIBUTE((format(printf, 1, 2))); - -#define addarc _Py_addarc -#define addbit _Py_addbit -#define adddfa _Py_adddfa -#define addfirstsets _Py_addfirstsets -#define addlabel _Py_addlabel -#define addstate _Py_addstate -#define delbitset _Py_delbitset -#define dumptree _Py_dumptree -#define findlabel _Py_findlabel -#define mergebitset _Py_mergebitset -#define meta_grammar _Py_meta_grammar -#define newbitset _Py_newbitset -#define newgrammar _Py_newgrammar -#define pgen _Py_pgen -#define printgrammar _Py_printgrammar -#define printnonterminals _Py_printnonterminals -#define printtree _Py_printtree -#define samebitset _Py_samebitset -#define showtree _Py_showtree -#define tok_dump _Py_tok_dump -#define translatelabels _Py_translatelabels - -#ifdef __cplusplus -} -#endif -#endif /* !Py_PGENHEADERS_H */
--- a/test/include/python2.7/py_curses.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,176 +0,0 @@ - -#ifndef Py_CURSES_H -#define Py_CURSES_H - -#ifdef __APPLE__ -/* -** On Mac OS X 10.2 [n]curses.h and stdlib.h use different guards -** against multiple definition of wchar_t. -*/ -#ifdef _BSD_WCHAR_T_DEFINED_ -#define _WCHAR_T -#endif - -/* the following define is necessary for OS X 10.6; without it, the - Apple-supplied ncurses.h sets NCURSES_OPAQUE to 1, and then Python - can't get at the WINDOW flags field. */ -#define NCURSES_OPAQUE 0 -#endif /* __APPLE__ */ - -#ifdef __FreeBSD__ -/* -** On FreeBSD, [n]curses.h and stdlib.h/wchar.h use different guards -** against multiple definition of wchar_t and wint_t. -*/ -#ifdef _XOPEN_SOURCE_EXTENDED -#ifndef __FreeBSD_version -#include <osreldate.h> -#endif -#if __FreeBSD_version >= 500000 -#ifndef __wchar_t -#define __wchar_t -#endif -#ifndef __wint_t -#define __wint_t -#endif -#else -#ifndef _WCHAR_T -#define _WCHAR_T -#endif -#ifndef _WINT_T -#define _WINT_T -#endif -#endif -#endif -#endif - -#ifdef HAVE_NCURSES_H -#include <ncurses.h> -#else -#include <curses.h> -#ifdef HAVE_TERM_H -/* for tigetstr, which is not declared in SysV curses */ -#include <term.h> -#endif -#endif - -#ifdef HAVE_NCURSES_H -/* configure was checking <curses.h>, but we will - use <ncurses.h>, which has all these features. */ -#ifndef WINDOW_HAS_FLAGS -#define WINDOW_HAS_FLAGS 1 -#endif -#ifndef MVWDELCH_IS_EXPRESSION -#define MVWDELCH_IS_EXPRESSION 1 -#endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -#define PyCurses_API_pointers 4 - -/* Type declarations */ - -typedef struct { - PyObject_HEAD - WINDOW *win; -} PyCursesWindowObject; - -#define PyCursesWindow_Check(v) (Py_TYPE(v) == &PyCursesWindow_Type) - -#define PyCurses_CAPSULE_NAME "_curses._C_API" - - -#ifdef CURSES_MODULE -/* This section is used when compiling _cursesmodule.c */ - -#else -/* This section is used in modules that use the _cursesmodule API */ - -static void **PyCurses_API; - -#define PyCursesWindow_Type (*(PyTypeObject *) PyCurses_API[0]) -#define PyCursesSetupTermCalled {if (! ((int (*)(void))PyCurses_API[1]) () ) return NULL;} -#define PyCursesInitialised {if (! ((int (*)(void))PyCurses_API[2]) () ) return NULL;} -#define PyCursesInitialisedColor {if (! ((int (*)(void))PyCurses_API[3]) () ) return NULL;} - -#define import_curses() \ - PyCurses_API = (void **)PyCapsule_Import(PyCurses_CAPSULE_NAME, 1); - -#endif - -/* general error messages */ -static char *catchall_ERR = "curses function returned ERR"; -static char *catchall_NULL = "curses function returned NULL"; - -/* Function Prototype Macros - They are ugly but very, very useful. ;-) - - X - function name - TYPE - parameter Type - ERGSTR - format string for construction of the return value - PARSESTR - format string for argument parsing - */ - -#define NoArgNoReturnFunction(X) \ -static PyObject *PyCurses_ ## X (PyObject *self) \ -{ \ - PyCursesInitialised \ - return PyCursesCheckERR(X(), # X); } - -#define NoArgOrFlagNoReturnFunction(X) \ -static PyObject *PyCurses_ ## X (PyObject *self, PyObject *args) \ -{ \ - int flag = 0; \ - PyCursesInitialised \ - switch(PyTuple_Size(args)) { \ - case 0: \ - return PyCursesCheckERR(X(), # X); \ - case 1: \ - if (!PyArg_ParseTuple(args, "i;True(1) or False(0)", &flag)) return NULL; \ - if (flag) return PyCursesCheckERR(X(), # X); \ - else return PyCursesCheckERR(no ## X (), # X); \ - default: \ - PyErr_SetString(PyExc_TypeError, # X " requires 0 or 1 arguments"); \ - return NULL; } } - -#define NoArgReturnIntFunction(X) \ -static PyObject *PyCurses_ ## X (PyObject *self) \ -{ \ - PyCursesInitialised \ - return PyInt_FromLong((long) X()); } - - -#define NoArgReturnStringFunction(X) \ -static PyObject *PyCurses_ ## X (PyObject *self) \ -{ \ - PyCursesInitialised \ - return PyString_FromString(X()); } - -#define NoArgTrueFalseFunction(X) \ -static PyObject *PyCurses_ ## X (PyObject *self) \ -{ \ - PyCursesInitialised \ - if (X () == FALSE) { \ - Py_INCREF(Py_False); \ - return Py_False; \ - } \ - Py_INCREF(Py_True); \ - return Py_True; } - -#define NoArgNoReturnVoidFunction(X) \ -static PyObject *PyCurses_ ## X (PyObject *self) \ -{ \ - PyCursesInitialised \ - X(); \ - Py_INCREF(Py_None); \ - return Py_None; } - -#ifdef __cplusplus -} -#endif - -#endif /* !defined(Py_CURSES_H) */ - -
--- a/test/include/python2.7/pyarena.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -/* An arena-like memory interface for the compiler. - */ - -#ifndef Py_PYARENA_H -#define Py_PYARENA_H - -#ifdef __cplusplus -extern "C" { -#endif - - typedef struct _arena PyArena; - - /* PyArena_New() and PyArena_Free() create a new arena and free it, - respectively. Once an arena has been created, it can be used - to allocate memory via PyArena_Malloc(). Pointers to PyObject can - also be registered with the arena via PyArena_AddPyObject(), and the - arena will ensure that the PyObjects stay alive at least until - PyArena_Free() is called. When an arena is freed, all the memory it - allocated is freed, the arena releases internal references to registered - PyObject*, and none of its pointers are valid. - XXX (tim) What does "none of its pointers are valid" mean? Does it - XXX mean that pointers previously obtained via PyArena_Malloc() are - XXX no longer valid? (That's clearly true, but not sure that's what - XXX the text is trying to say.) - - PyArena_New() returns an arena pointer. On error, it - returns a negative number and sets an exception. - XXX (tim): Not true. On error, PyArena_New() actually returns NULL, - XXX and looks like it may or may not set an exception (e.g., if the - XXX internal PyList_New(0) returns NULL, PyArena_New() passes that on - XXX and an exception is set; OTOH, if the internal - XXX block_new(DEFAULT_BLOCK_SIZE) returns NULL, that's passed on but - XXX an exception is not set in that case). - */ - PyAPI_FUNC(PyArena *) PyArena_New(void); - PyAPI_FUNC(void) PyArena_Free(PyArena *); - - /* Mostly like malloc(), return the address of a block of memory spanning - * `size` bytes, or return NULL (without setting an exception) if enough - * new memory can't be obtained. Unlike malloc(0), PyArena_Malloc() with - * size=0 does not guarantee to return a unique pointer (the pointer - * returned may equal one or more other pointers obtained from - * PyArena_Malloc()). - * Note that pointers obtained via PyArena_Malloc() must never be passed to - * the system free() or realloc(), or to any of Python's similar memory- - * management functions. PyArena_Malloc()-obtained pointers remain valid - * until PyArena_Free(ar) is called, at which point all pointers obtained - * from the arena `ar` become invalid simultaneously. - */ - PyAPI_FUNC(void *) PyArena_Malloc(PyArena *, size_t size); - - /* This routine isn't a proper arena allocation routine. It takes - * a PyObject* and records it so that it can be DECREFed when the - * arena is freed. - */ - PyAPI_FUNC(int) PyArena_AddPyObject(PyArena *, PyObject *); - -#ifdef __cplusplus -} -#endif - -#endif /* !Py_PYARENA_H */
--- a/test/include/python2.7/pycapsule.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,56 +0,0 @@ - -/* Capsule objects let you wrap a C "void *" pointer in a Python - object. They're a way of passing data through the Python interpreter - without creating your own custom type. - - Capsules are used for communication between extension modules. - They provide a way for an extension module to export a C interface - to other extension modules, so that extension modules can use the - Python import mechanism to link to one another. - - For more information, please see "c-api/capsule.html" in the - documentation. -*/ - -#ifndef Py_CAPSULE_H -#define Py_CAPSULE_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_DATA(PyTypeObject) PyCapsule_Type; - -typedef void (*PyCapsule_Destructor)(PyObject *); - -#define PyCapsule_CheckExact(op) (Py_TYPE(op) == &PyCapsule_Type) - - -PyAPI_FUNC(PyObject *) PyCapsule_New( - void *pointer, - const char *name, - PyCapsule_Destructor destructor); - -PyAPI_FUNC(void *) PyCapsule_GetPointer(PyObject *capsule, const char *name); - -PyAPI_FUNC(PyCapsule_Destructor) PyCapsule_GetDestructor(PyObject *capsule); - -PyAPI_FUNC(const char *) PyCapsule_GetName(PyObject *capsule); - -PyAPI_FUNC(void *) PyCapsule_GetContext(PyObject *capsule); - -PyAPI_FUNC(int) PyCapsule_IsValid(PyObject *capsule, const char *name); - -PyAPI_FUNC(int) PyCapsule_SetPointer(PyObject *capsule, void *pointer); - -PyAPI_FUNC(int) PyCapsule_SetDestructor(PyObject *capsule, PyCapsule_Destructor destructor); - -PyAPI_FUNC(int) PyCapsule_SetName(PyObject *capsule, const char *name); - -PyAPI_FUNC(int) PyCapsule_SetContext(PyObject *capsule, void *context); - -PyAPI_FUNC(void *) PyCapsule_Import(const char *name, int no_block); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_CAPSULE_H */
--- a/test/include/python2.7/pyconfig.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ -#if defined(__linux__) -# if defined(__x86_64__) && defined(__LP64__) -# include <x86_64-linux-gnu/python2.7/pyconfig.h> -# elif defined(__x86_64__) && defined(__ILP32__) -# include <x86_64-linux-gnux32/python2.7/pyconfig.h> -# elif defined(__i386__) -# include <i386-linux-gnu/python2.7/pyconfig.h> -# elif defined(__aarch64__) && defined(__AARCH64EL__) -# if defined(__ILP32__) -# include <aarch64_ilp32-linux-gnu/python2.7/pyconfig.h> -# else -# include <aarch64-linux-gnu/python2.7/pyconfig.h> -# endif -# elif defined(__aarch64__) && defined(__AARCH64EB__) -# if defined(__ILP32__) -# include <aarch64_be_ilp32-linux-gnu/python2.7/pyconfig.h> -# else -# include <aarch64_be-linux-gnu/python2.7/pyconfig.h> -# endif -# elif defined(__alpha__) -# include <alpha-linux-gnu/python2.7/pyconfig.h> -# elif defined(__ARM_EABI__) && defined(__ARM_PCS_VFP) -# if defined(__ARMEL__) -# include <arm-linux-gnueabihf/python2.7/pyconfig.h> -# else -# include <armeb-linux-gnueabihf/python2.7/pyconfig.h> -# endif -# elif defined(__ARM_EABI__) && !defined(__ARM_PCS_VFP) -# if defined(__ARMEL__) -# include <arm-linux-gnueabi/python2.7/pyconfig.h> -# else -# include <armeb-linux-gnueabi/python2.7/pyconfig.h> -# endif -# elif defined(__hppa__) -# include <hppa-linux-gnu/python2.7/pyconfig.h> -# elif defined(__ia64__) -# include <ia64-linux-gnu/python2.7/pyconfig.h> -# elif defined(__m68k__) && !defined(__mcoldfire__) -# include <m68k-linux-gnu/python2.7/pyconfig.h> -# elif defined(__mips_hard_float) && defined(_MIPSEL) -# if _MIPS_SIM == _ABIO32 -# include <mipsel-linux-gnu/python2.7/pyconfig.h> -# elif _MIPS_SIM == _ABIN32 -# include <mips64el-linux-gnuabin32/python2.7/pyconfig.h> -# elif _MIPS_SIM == _ABI64 -# include <mips64el-linux-gnuabi64/python2.7/pyconfig.h> -# else -# error unknown multiarch location for pyconfig.h -# endif -# elif defined(__mips_hard_float) -# if _MIPS_SIM == _ABIO32 -# include <mips-linux-gnu/python2.7/pyconfig.h> -# elif _MIPS_SIM == _ABIN32 -# include <mips64-linux-gnuabin32/python2.7/pyconfig.h> -# elif _MIPS_SIM == _ABI64 -# include <mips64-linux-gnuabi64/python2.7/pyconfig.h> -# else -# error unknown multiarch location for pyconfig.h -# endif -# elif defined(__or1k__) -# include <or1k-linux-gnu/python2.7/pyconfig.h> -# elif defined(__powerpc__) && defined(__SPE__) -# include <powerpc-linux-gnuspe/python2.7/pyconfig.h> -# elif defined(__powerpc64__) -# if defined(__LITTLE_ENDIAN__) -# include <powerpc64le-linux-gnu/python2.7/pyconfig.h> -# else -# include <powerpc64-linux-gnu/python2.7/pyconfig.h> -# endif -# elif defined(__powerpc__) -# include <powerpc-linux-gnu/python2.7/pyconfig.h> -# elif defined(__s390x__) -# include <s390x-linux-gnu/python2.7/pyconfig.h> -# elif defined(__s390__) -# include <s390-linux-gnu/python2.7/pyconfig.h> -# elif defined(__sh__) && defined(__LITTLE_ENDIAN__) -# include <sh4-linux-gnu/python2.7/pyconfig.h> -# elif defined(__sparc__) && defined(__arch64__) -# include <sparc64-linux-gnu/python2.7/pyconfig.h> -# elif defined(__sparc__) -# include <sparc-linux-gnu/python2.7/pyconfig.h> -# else -# error unknown multiarch location for pyconfig.h -# endif -#elif defined(__FreeBSD_kernel__) -# if defined(__LP64__) -# include <x86_64-kfreebsd-gnu/python2.7/pyconfig.h> -# elif defined(__i386__) -# include <i386-kfreebsd-gnu/python2.7/pyconfig.h> -# else -# error unknown multiarch location for pyconfig.h -# endif -#elif defined(__gnu_hurd__) -# include <i386-gnu/python2.7/pyconfig.h> -#else -# error unknown multiarch location for pyconfig.h -#endif
--- a/test/include/python2.7/pyctype.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#ifndef PYCTYPE_H -#define PYCTYPE_H - -#define PY_CTF_LOWER 0x01 -#define PY_CTF_UPPER 0x02 -#define PY_CTF_ALPHA (PY_CTF_LOWER|PY_CTF_UPPER) -#define PY_CTF_DIGIT 0x04 -#define PY_CTF_ALNUM (PY_CTF_ALPHA|PY_CTF_DIGIT) -#define PY_CTF_SPACE 0x08 -#define PY_CTF_XDIGIT 0x10 - -PyAPI_DATA(const unsigned int) _Py_ctype_table[256]; - -/* Unlike their C counterparts, the following macros are not meant to - * handle an int with any of the values [EOF, 0-UCHAR_MAX]. The argument - * must be a signed/unsigned char. */ -#define Py_ISLOWER(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_LOWER) -#define Py_ISUPPER(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_UPPER) -#define Py_ISALPHA(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALPHA) -#define Py_ISDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT) -#define Py_ISXDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT) -#define Py_ISALNUM(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALNUM) -#define Py_ISSPACE(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_SPACE) - -PyAPI_DATA(const unsigned char) _Py_ctype_tolower[256]; -PyAPI_DATA(const unsigned char) _Py_ctype_toupper[256]; - -#define Py_TOLOWER(c) (_Py_ctype_tolower[Py_CHARMASK(c)]) -#define Py_TOUPPER(c) (_Py_ctype_toupper[Py_CHARMASK(c)]) - -#endif /* !PYCTYPE_H */
--- a/test/include/python2.7/pydebug.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ - -#ifndef Py_PYDEBUG_H -#define Py_PYDEBUG_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_DATA(int) Py_DebugFlag; -PyAPI_DATA(int) Py_VerboseFlag; -PyAPI_DATA(int) Py_InteractiveFlag; -PyAPI_DATA(int) Py_InspectFlag; -PyAPI_DATA(int) Py_OptimizeFlag; -PyAPI_DATA(int) Py_NoSiteFlag; -PyAPI_DATA(int) Py_BytesWarningFlag; -PyAPI_DATA(int) Py_UseClassExceptionsFlag; -PyAPI_DATA(int) Py_FrozenFlag; -PyAPI_DATA(int) Py_TabcheckFlag; -PyAPI_DATA(int) Py_UnicodeFlag; -PyAPI_DATA(int) Py_IgnoreEnvironmentFlag; -PyAPI_DATA(int) Py_DivisionWarningFlag; -PyAPI_DATA(int) Py_DontWriteBytecodeFlag; -PyAPI_DATA(int) Py_NoUserSiteDirectory; -/* _XXX Py_QnewFlag should go away in 3.0. It's true iff -Qnew is passed, - on the command line, and is used in 2.2 by ceval.c to make all "/" divisions - true divisions (which they will be in 3.0). */ -PyAPI_DATA(int) _Py_QnewFlag; -/* Warn about 3.x issues */ -PyAPI_DATA(int) Py_Py3kWarningFlag; -PyAPI_DATA(int) Py_HashRandomizationFlag; - -/* this is a wrapper around getenv() that pays attention to - Py_IgnoreEnvironmentFlag. It should be used for getting variables like - PYTHONPATH and PYTHONHOME from the environment */ -#define Py_GETENV(s) (Py_IgnoreEnvironmentFlag ? NULL : getenv(s)) - -PyAPI_FUNC(void) Py_FatalError(const char *message); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_PYDEBUG_H */
--- a/test/include/python2.7/pyerrors.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,329 +0,0 @@ -#ifndef Py_ERRORS_H -#define Py_ERRORS_H -#ifdef __cplusplus -extern "C" { -#endif - -/* Error objects */ - -typedef struct { - PyObject_HEAD - PyObject *dict; - PyObject *args; - PyObject *message; -} PyBaseExceptionObject; - -typedef struct { - PyObject_HEAD - PyObject *dict; - PyObject *args; - PyObject *message; - PyObject *msg; - PyObject *filename; - PyObject *lineno; - PyObject *offset; - PyObject *text; - PyObject *print_file_and_line; -} PySyntaxErrorObject; - -#ifdef Py_USING_UNICODE -typedef struct { - PyObject_HEAD - PyObject *dict; - PyObject *args; - PyObject *message; - PyObject *encoding; - PyObject *object; - Py_ssize_t start; - Py_ssize_t end; - PyObject *reason; -} PyUnicodeErrorObject; -#endif - -typedef struct { - PyObject_HEAD - PyObject *dict; - PyObject *args; - PyObject *message; - PyObject *code; -} PySystemExitObject; - -typedef struct { - PyObject_HEAD - PyObject *dict; - PyObject *args; - PyObject *message; - PyObject *myerrno; - PyObject *strerror; - PyObject *filename; -} PyEnvironmentErrorObject; - -#ifdef MS_WINDOWS -typedef struct { - PyObject_HEAD - PyObject *dict; - PyObject *args; - PyObject *message; - PyObject *myerrno; - PyObject *strerror; - PyObject *filename; - PyObject *winerror; -} PyWindowsErrorObject; -#endif - -/* Error handling definitions */ - -PyAPI_FUNC(void) PyErr_SetNone(PyObject *); -PyAPI_FUNC(void) PyErr_SetObject(PyObject *, PyObject *); -PyAPI_FUNC(void) PyErr_SetString(PyObject *, const char *); -PyAPI_FUNC(PyObject *) PyErr_Occurred(void); -PyAPI_FUNC(void) PyErr_Clear(void); -PyAPI_FUNC(void) PyErr_Fetch(PyObject **, PyObject **, PyObject **); -PyAPI_FUNC(void) PyErr_Restore(PyObject *, PyObject *, PyObject *); - -#ifdef Py_DEBUG -#define _PyErr_OCCURRED() PyErr_Occurred() -#else -#define _PyErr_OCCURRED() (_PyThreadState_Current->curexc_type) -#endif - -/* Error testing and normalization */ -PyAPI_FUNC(int) PyErr_GivenExceptionMatches(PyObject *, PyObject *); -PyAPI_FUNC(int) PyErr_ExceptionMatches(PyObject *); -PyAPI_FUNC(void) PyErr_NormalizeException(PyObject**, PyObject**, PyObject**); -PyAPI_FUNC(void) _PyErr_ReplaceException(PyObject *, PyObject *, PyObject *); - -/* */ - -#define PyExceptionClass_Check(x) \ - (PyClass_Check((x)) || (PyType_Check((x)) && \ - PyType_FastSubclass((PyTypeObject*)(x), Py_TPFLAGS_BASE_EXC_SUBCLASS))) - -#define PyExceptionInstance_Check(x) \ - (PyInstance_Check((x)) || \ - PyType_FastSubclass((x)->ob_type, Py_TPFLAGS_BASE_EXC_SUBCLASS)) - -#define PyExceptionClass_Name(x) \ - (PyClass_Check((x)) \ - ? PyString_AS_STRING(((PyClassObject*)(x))->cl_name) \ - : (char *)(((PyTypeObject*)(x))->tp_name)) - -#define PyExceptionInstance_Class(x) \ - ((PyInstance_Check((x)) \ - ? (PyObject*)((PyInstanceObject*)(x))->in_class \ - : (PyObject*)((x)->ob_type))) - - -/* Predefined exceptions */ - -PyAPI_DATA(PyObject *) PyExc_BaseException; -PyAPI_DATA(PyObject *) PyExc_Exception; -PyAPI_DATA(PyObject *) PyExc_StopIteration; -PyAPI_DATA(PyObject *) PyExc_GeneratorExit; -PyAPI_DATA(PyObject *) PyExc_StandardError; -PyAPI_DATA(PyObject *) PyExc_ArithmeticError; -PyAPI_DATA(PyObject *) PyExc_LookupError; - -PyAPI_DATA(PyObject *) PyExc_AssertionError; -PyAPI_DATA(PyObject *) PyExc_AttributeError; -PyAPI_DATA(PyObject *) PyExc_EOFError; -PyAPI_DATA(PyObject *) PyExc_FloatingPointError; -PyAPI_DATA(PyObject *) PyExc_EnvironmentError; -PyAPI_DATA(PyObject *) PyExc_IOError; -PyAPI_DATA(PyObject *) PyExc_OSError; -PyAPI_DATA(PyObject *) PyExc_ImportError; -PyAPI_DATA(PyObject *) PyExc_IndexError; -PyAPI_DATA(PyObject *) PyExc_KeyError; -PyAPI_DATA(PyObject *) PyExc_KeyboardInterrupt; -PyAPI_DATA(PyObject *) PyExc_MemoryError; -PyAPI_DATA(PyObject *) PyExc_NameError; -PyAPI_DATA(PyObject *) PyExc_OverflowError; -PyAPI_DATA(PyObject *) PyExc_RuntimeError; -PyAPI_DATA(PyObject *) PyExc_NotImplementedError; -PyAPI_DATA(PyObject *) PyExc_SyntaxError; -PyAPI_DATA(PyObject *) PyExc_IndentationError; -PyAPI_DATA(PyObject *) PyExc_TabError; -PyAPI_DATA(PyObject *) PyExc_ReferenceError; -PyAPI_DATA(PyObject *) PyExc_SystemError; -PyAPI_DATA(PyObject *) PyExc_SystemExit; -PyAPI_DATA(PyObject *) PyExc_TypeError; -PyAPI_DATA(PyObject *) PyExc_UnboundLocalError; -PyAPI_DATA(PyObject *) PyExc_UnicodeError; -PyAPI_DATA(PyObject *) PyExc_UnicodeEncodeError; -PyAPI_DATA(PyObject *) PyExc_UnicodeDecodeError; -PyAPI_DATA(PyObject *) PyExc_UnicodeTranslateError; -PyAPI_DATA(PyObject *) PyExc_ValueError; -PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError; -#ifdef MS_WINDOWS -PyAPI_DATA(PyObject *) PyExc_WindowsError; -#endif -#ifdef __VMS -PyAPI_DATA(PyObject *) PyExc_VMSError; -#endif - -PyAPI_DATA(PyObject *) PyExc_BufferError; - -PyAPI_DATA(PyObject *) PyExc_MemoryErrorInst; -PyAPI_DATA(PyObject *) PyExc_RecursionErrorInst; - -/* Predefined warning categories */ -PyAPI_DATA(PyObject *) PyExc_Warning; -PyAPI_DATA(PyObject *) PyExc_UserWarning; -PyAPI_DATA(PyObject *) PyExc_DeprecationWarning; -PyAPI_DATA(PyObject *) PyExc_PendingDeprecationWarning; -PyAPI_DATA(PyObject *) PyExc_SyntaxWarning; -PyAPI_DATA(PyObject *) PyExc_RuntimeWarning; -PyAPI_DATA(PyObject *) PyExc_FutureWarning; -PyAPI_DATA(PyObject *) PyExc_ImportWarning; -PyAPI_DATA(PyObject *) PyExc_UnicodeWarning; -PyAPI_DATA(PyObject *) PyExc_BytesWarning; - - -/* Convenience functions */ - -PyAPI_FUNC(int) PyErr_BadArgument(void); -PyAPI_FUNC(PyObject *) PyErr_NoMemory(void); -PyAPI_FUNC(PyObject *) PyErr_SetFromErrno(PyObject *); -PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithFilenameObject( - PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithFilename( - PyObject *, const char *); -#ifdef MS_WINDOWS -PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithUnicodeFilename( - PyObject *, const Py_UNICODE *); -#endif /* MS_WINDOWS */ - -PyAPI_FUNC(PyObject *) PyErr_Format(PyObject *, const char *, ...) - Py_GCC_ATTRIBUTE((format(printf, 2, 3))); - -#ifdef MS_WINDOWS -PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilenameObject( - int, const char *); -PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilename( - int, const char *); -PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithUnicodeFilename( - int, const Py_UNICODE *); -PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErr(int); -PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilenameObject( - PyObject *,int, PyObject *); -PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilename( - PyObject *,int, const char *); -PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithUnicodeFilename( - PyObject *,int, const Py_UNICODE *); -PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErr(PyObject *, int); -#endif /* MS_WINDOWS */ - -/* Export the old function so that the existing API remains available: */ -PyAPI_FUNC(void) PyErr_BadInternalCall(void); -PyAPI_FUNC(void) _PyErr_BadInternalCall(const char *filename, int lineno); -/* Mask the old API with a call to the new API for code compiled under - Python 2.0: */ -#define PyErr_BadInternalCall() _PyErr_BadInternalCall(__FILE__, __LINE__) - -/* Function to create a new exception */ -PyAPI_FUNC(PyObject *) PyErr_NewException( - char *name, PyObject *base, PyObject *dict); -PyAPI_FUNC(PyObject *) PyErr_NewExceptionWithDoc( - char *name, char *doc, PyObject *base, PyObject *dict); -PyAPI_FUNC(void) PyErr_WriteUnraisable(PyObject *); - -/* In sigcheck.c or signalmodule.c */ -PyAPI_FUNC(int) PyErr_CheckSignals(void); -PyAPI_FUNC(void) PyErr_SetInterrupt(void); - -/* In signalmodule.c */ -int PySignal_SetWakeupFd(int fd); - -/* Support for adding program text to SyntaxErrors */ -PyAPI_FUNC(void) PyErr_SyntaxLocation(const char *, int); -PyAPI_FUNC(PyObject *) PyErr_ProgramText(const char *, int); - -#ifdef Py_USING_UNICODE -/* The following functions are used to create and modify unicode - exceptions from C */ - -/* create a UnicodeDecodeError object */ -PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_Create( - const char *, const char *, Py_ssize_t, Py_ssize_t, Py_ssize_t, const char *); - -/* create a UnicodeEncodeError object */ -PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_Create( - const char *, const Py_UNICODE *, Py_ssize_t, Py_ssize_t, Py_ssize_t, const char *); - -/* create a UnicodeTranslateError object */ -PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_Create( - const Py_UNICODE *, Py_ssize_t, Py_ssize_t, Py_ssize_t, const char *); - -/* get the encoding attribute */ -PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetEncoding(PyObject *); -PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetEncoding(PyObject *); - -/* get the object attribute */ -PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetObject(PyObject *); -PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetObject(PyObject *); -PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetObject(PyObject *); - -/* get the value of the start attribute (the int * may not be NULL) - return 0 on success, -1 on failure */ -PyAPI_FUNC(int) PyUnicodeEncodeError_GetStart(PyObject *, Py_ssize_t *); -PyAPI_FUNC(int) PyUnicodeDecodeError_GetStart(PyObject *, Py_ssize_t *); -PyAPI_FUNC(int) PyUnicodeTranslateError_GetStart(PyObject *, Py_ssize_t *); - -/* assign a new value to the start attribute - return 0 on success, -1 on failure */ -PyAPI_FUNC(int) PyUnicodeEncodeError_SetStart(PyObject *, Py_ssize_t); -PyAPI_FUNC(int) PyUnicodeDecodeError_SetStart(PyObject *, Py_ssize_t); -PyAPI_FUNC(int) PyUnicodeTranslateError_SetStart(PyObject *, Py_ssize_t); - -/* get the value of the end attribute (the int *may not be NULL) - return 0 on success, -1 on failure */ -PyAPI_FUNC(int) PyUnicodeEncodeError_GetEnd(PyObject *, Py_ssize_t *); -PyAPI_FUNC(int) PyUnicodeDecodeError_GetEnd(PyObject *, Py_ssize_t *); -PyAPI_FUNC(int) PyUnicodeTranslateError_GetEnd(PyObject *, Py_ssize_t *); - -/* assign a new value to the end attribute - return 0 on success, -1 on failure */ -PyAPI_FUNC(int) PyUnicodeEncodeError_SetEnd(PyObject *, Py_ssize_t); -PyAPI_FUNC(int) PyUnicodeDecodeError_SetEnd(PyObject *, Py_ssize_t); -PyAPI_FUNC(int) PyUnicodeTranslateError_SetEnd(PyObject *, Py_ssize_t); - -/* get the value of the reason attribute */ -PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetReason(PyObject *); -PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetReason(PyObject *); -PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetReason(PyObject *); - -/* assign a new value to the reason attribute - return 0 on success, -1 on failure */ -PyAPI_FUNC(int) PyUnicodeEncodeError_SetReason( - PyObject *, const char *); -PyAPI_FUNC(int) PyUnicodeDecodeError_SetReason( - PyObject *, const char *); -PyAPI_FUNC(int) PyUnicodeTranslateError_SetReason( - PyObject *, const char *); -#endif - - -/* These APIs aren't really part of the error implementation, but - often needed to format error messages; the native C lib APIs are - not available on all platforms, which is why we provide emulations - for those platforms in Python/mysnprintf.c, - WARNING: The return value of snprintf varies across platforms; do - not rely on any particular behavior; eventually the C99 defn may - be reliable. -*/ -#if defined(MS_WIN32) && !defined(HAVE_SNPRINTF) -# define HAVE_SNPRINTF -# define snprintf _snprintf -# define vsnprintf _vsnprintf -#endif - -#include <stdarg.h> -PyAPI_FUNC(int) PyOS_snprintf(char *str, size_t size, const char *format, ...) - Py_GCC_ATTRIBUTE((format(printf, 3, 4))); -PyAPI_FUNC(int) PyOS_vsnprintf(char *str, size_t size, const char *format, va_list va) - Py_GCC_ATTRIBUTE((format(printf, 3, 0))); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_ERRORS_H */
--- a/test/include/python2.7/pyexpat.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -/* Stuff to export relevant 'expat' entry points from pyexpat to other - * parser modules, such as cElementTree. */ - -/* note: you must import expat.h before importing this module! */ - -#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.0" -#define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI" - -struct PyExpat_CAPI -{ - char* magic; /* set to PyExpat_CAPI_MAGIC */ - int size; /* set to sizeof(struct PyExpat_CAPI) */ - int MAJOR_VERSION; - int MINOR_VERSION; - int MICRO_VERSION; - /* pointers to selected expat functions. add new functions at - the end, if needed */ - const XML_LChar * (*ErrorString)(enum XML_Error code); - enum XML_Error (*GetErrorCode)(XML_Parser parser); - XML_Size (*GetErrorColumnNumber)(XML_Parser parser); - XML_Size (*GetErrorLineNumber)(XML_Parser parser); - enum XML_Status (*Parse)( - XML_Parser parser, const char *s, int len, int isFinal); - XML_Parser (*ParserCreate_MM)( - const XML_Char *encoding, const XML_Memory_Handling_Suite *memsuite, - const XML_Char *namespaceSeparator); - void (*ParserFree)(XML_Parser parser); - void (*SetCharacterDataHandler)( - XML_Parser parser, XML_CharacterDataHandler handler); - void (*SetCommentHandler)( - XML_Parser parser, XML_CommentHandler handler); - void (*SetDefaultHandlerExpand)( - XML_Parser parser, XML_DefaultHandler handler); - void (*SetElementHandler)( - XML_Parser parser, XML_StartElementHandler start, - XML_EndElementHandler end); - void (*SetNamespaceDeclHandler)( - XML_Parser parser, XML_StartNamespaceDeclHandler start, - XML_EndNamespaceDeclHandler end); - void (*SetProcessingInstructionHandler)( - XML_Parser parser, XML_ProcessingInstructionHandler handler); - void (*SetUnknownEncodingHandler)( - XML_Parser parser, XML_UnknownEncodingHandler handler, - void *encodingHandlerData); - void (*SetUserData)(XML_Parser parser, void *userData); - /* always add new stuff to the end! */ -}; -
--- a/test/include/python2.7/pyfpe.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,176 +0,0 @@ -#ifndef Py_PYFPE_H -#define Py_PYFPE_H -#ifdef __cplusplus -extern "C" { -#endif -/* - --------------------------------------------------------------------- - / Copyright (c) 1996. \ - | The Regents of the University of California. | - | All rights reserved. | - | | - | Permission to use, copy, modify, and distribute this software for | - | any purpose without fee is hereby granted, provided that this en- | - | tire notice is included in all copies of any software which is or | - | includes a copy or modification of this software and in all | - | copies of the supporting documentation for such software. | - | | - | This work was produced at the University of California, Lawrence | - | Livermore National Laboratory under contract no. W-7405-ENG-48 | - | between the U.S. Department of Energy and The Regents of the | - | University of California for the operation of UC LLNL. | - | | - | DISCLAIMER | - | | - | This software was prepared as an account of work sponsored by an | - | agency of the United States Government. Neither the United States | - | Government nor the University of California nor any of their em- | - | ployees, makes any warranty, express or implied, or assumes any | - | liability or responsibility for the accuracy, completeness, or | - | usefulness of any information, apparatus, product, or process | - | disclosed, or represents that its use would not infringe | - | privately-owned rights. Reference herein to any specific commer- | - | cial products, process, or service by trade name, trademark, | - | manufacturer, or otherwise, does not necessarily constitute or | - | imply its endorsement, recommendation, or favoring by the United | - | States Government or the University of California. The views and | - | opinions of authors expressed herein do not necessarily state or | - | reflect those of the United States Government or the University | - | of California, and shall not be used for advertising or product | - \ endorsement purposes. / - --------------------------------------------------------------------- -*/ - -/* - * Define macros for handling SIGFPE. - * Lee Busby, LLNL, November, 1996 - * busby1@llnl.gov - * - ********************************************* - * Overview of the system for handling SIGFPE: - * - * This file (Include/pyfpe.h) defines a couple of "wrapper" macros for - * insertion into your Python C code of choice. Their proper use is - * discussed below. The file Python/pyfpe.c defines a pair of global - * variables PyFPE_jbuf and PyFPE_counter which are used by the signal - * handler for SIGFPE to decide if a particular exception was protected - * by the macros. The signal handler itself, and code for enabling the - * generation of SIGFPE in the first place, is in a (new) Python module - * named fpectl. This module is standard in every respect. It can be loaded - * either statically or dynamically as you choose, and like any other - * Python module, has no effect until you import it. - * - * In the general case, there are three steps toward handling SIGFPE in any - * Python code: - * - * 1) Add the *_PROTECT macros to your C code as required to protect - * dangerous floating point sections. - * - * 2) Turn on the inclusion of the code by adding the ``--with-fpectl'' - * flag at the time you run configure. If the fpectl or other modules - * which use the *_PROTECT macros are to be dynamically loaded, be - * sure they are compiled with WANT_SIGFPE_HANDLER defined. - * - * 3) When python is built and running, import fpectl, and execute - * fpectl.turnon_sigfpe(). This sets up the signal handler and enables - * generation of SIGFPE whenever an exception occurs. From this point - * on, any properly trapped SIGFPE should result in the Python - * FloatingPointError exception. - * - * Step 1 has been done already for the Python kernel code, and should be - * done soon for the NumPy array package. Step 2 is usually done once at - * python install time. Python's behavior with respect to SIGFPE is not - * changed unless you also do step 3. Thus you can control this new - * facility at compile time, or run time, or both. - * - ******************************** - * Using the macros in your code: - * - * static PyObject *foobar(PyObject *self,PyObject *args) - * { - * .... - * PyFPE_START_PROTECT("Error in foobar", return 0) - * result = dangerous_op(somearg1, somearg2, ...); - * PyFPE_END_PROTECT(result) - * .... - * } - * - * If a floating point error occurs in dangerous_op, foobar returns 0 (NULL), - * after setting the associated value of the FloatingPointError exception to - * "Error in foobar". ``Dangerous_op'' can be a single operation, or a block - * of code, function calls, or any combination, so long as no alternate - * return is possible before the PyFPE_END_PROTECT macro is reached. - * - * The macros can only be used in a function context where an error return - * can be recognized as signaling a Python exception. (Generally, most - * functions that return a PyObject * will qualify.) - * - * Guido's original design suggestion for PyFPE_START_PROTECT and - * PyFPE_END_PROTECT had them open and close a local block, with a locally - * defined jmp_buf and jmp_buf pointer. This would allow recursive nesting - * of the macros. The Ansi C standard makes it clear that such local - * variables need to be declared with the "volatile" type qualifier to keep - * setjmp from corrupting their values. Some current implementations seem - * to be more restrictive. For example, the HPUX man page for setjmp says - * - * Upon the return from a setjmp() call caused by a longjmp(), the - * values of any non-static local variables belonging to the routine - * from which setjmp() was called are undefined. Code which depends on - * such values is not guaranteed to be portable. - * - * I therefore decided on a more limited form of nesting, using a counter - * variable (PyFPE_counter) to keep track of any recursion. If an exception - * occurs in an ``inner'' pair of macros, the return will apparently - * come from the outermost level. - * - */ - -#ifdef WANT_SIGFPE_HANDLER -#include <signal.h> -#include <setjmp.h> -#include <math.h> -extern jmp_buf PyFPE_jbuf; -extern int PyFPE_counter; -extern double PyFPE_dummy(void *); - -#define PyFPE_START_PROTECT(err_string, leave_stmt) \ -if (!PyFPE_counter++ && setjmp(PyFPE_jbuf)) { \ - PyErr_SetString(PyExc_FloatingPointError, err_string); \ - PyFPE_counter = 0; \ - leave_stmt; \ -} - -/* - * This (following) is a heck of a way to decrement a counter. However, - * unless the macro argument is provided, code optimizers will sometimes move - * this statement so that it gets executed *before* the unsafe expression - * which we're trying to protect. That pretty well messes things up, - * of course. - * - * If the expression(s) you're trying to protect don't happen to return a - * value, you will need to manufacture a dummy result just to preserve the - * correct ordering of statements. Note that the macro passes the address - * of its argument (so you need to give it something which is addressable). - * If your expression returns multiple results, pass the last such result - * to PyFPE_END_PROTECT. - * - * Note that PyFPE_dummy returns a double, which is cast to int. - * This seeming insanity is to tickle the Floating Point Unit (FPU). - * If an exception has occurred in a preceding floating point operation, - * some architectures (notably Intel 80x86) will not deliver the interrupt - * until the *next* floating point operation. This is painful if you've - * already decremented PyFPE_counter. - */ -#define PyFPE_END_PROTECT(v) PyFPE_counter -= (int)PyFPE_dummy(&(v)); - -#else - -#define PyFPE_START_PROTECT(err_string, leave_stmt) -#define PyFPE_END_PROTECT(v) - -#endif - -#ifdef __cplusplus -} -#endif -#endif /* !Py_PYFPE_H */
--- a/test/include/python2.7/pygetopt.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,18 +0,0 @@ - -#ifndef Py_PYGETOPT_H -#define Py_PYGETOPT_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_DATA(int) _PyOS_opterr; -PyAPI_DATA(int) _PyOS_optind; -PyAPI_DATA(char *) _PyOS_optarg; - -PyAPI_FUNC(void) _PyOS_ResetGetOpt(void); -PyAPI_FUNC(int) _PyOS_GetOpt(int argc, char **argv, char *optstring); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_PYGETOPT_H */
--- a/test/include/python2.7/pymacconfig.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,102 +0,0 @@ -#ifndef PYMACCONFIG_H -#define PYMACCONFIG_H - /* - * This file moves some of the autoconf magic to compile-time - * when building on MacOSX. This is needed for building 4-way - * universal binaries and for 64-bit universal binaries because - * the values redefined below aren't configure-time constant but - * only compile-time constant in these scenarios. - */ - -#if defined(__APPLE__) - -# undef SIZEOF_LONG -# undef SIZEOF_PTHREAD_T -# undef SIZEOF_SIZE_T -# undef SIZEOF_TIME_T -# undef SIZEOF_VOID_P -# undef SIZEOF__BOOL -# undef SIZEOF_UINTPTR_T -# undef SIZEOF_PTHREAD_T -# undef WORDS_BIGENDIAN -# undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 -# undef DOUBLE_IS_BIG_ENDIAN_IEEE754 -# undef DOUBLE_IS_LITTLE_ENDIAN_IEEE754 -# undef HAVE_GCC_ASM_FOR_X87 - -# undef VA_LIST_IS_ARRAY -# if defined(__LP64__) && defined(__x86_64__) -# define VA_LIST_IS_ARRAY 1 -# endif - -# undef HAVE_LARGEFILE_SUPPORT -# ifndef __LP64__ -# define HAVE_LARGEFILE_SUPPORT 1 -# endif - -# undef SIZEOF_LONG -# ifdef __LP64__ -# define SIZEOF__BOOL 1 -# define SIZEOF__BOOL 1 -# define SIZEOF_LONG 8 -# define SIZEOF_PTHREAD_T 8 -# define SIZEOF_SIZE_T 8 -# define SIZEOF_TIME_T 8 -# define SIZEOF_VOID_P 8 -# define SIZEOF_UINTPTR_T 8 -# define SIZEOF_PTHREAD_T 8 -# else -# ifdef __ppc__ -# define SIZEOF__BOOL 4 -# else -# define SIZEOF__BOOL 1 -# endif -# define SIZEOF_LONG 4 -# define SIZEOF_PTHREAD_T 4 -# define SIZEOF_SIZE_T 4 -# define SIZEOF_TIME_T 4 -# define SIZEOF_VOID_P 4 -# define SIZEOF_UINTPTR_T 4 -# define SIZEOF_PTHREAD_T 4 -# endif - -# if defined(__LP64__) - /* MacOSX 10.4 (the first release to support 64-bit code - * at all) only supports 64-bit in the UNIX layer. - * Therefore surpress the toolbox-glue in 64-bit mode. - */ - - /* In 64-bit mode setpgrp always has no arguments, in 32-bit - * mode that depends on the compilation environment - */ -# undef SETPGRP_HAVE_ARG - -# endif - -#ifdef __BIG_ENDIAN__ -#define WORDS_BIGENDIAN 1 -#define DOUBLE_IS_BIG_ENDIAN_IEEE754 -#else -#define DOUBLE_IS_LITTLE_ENDIAN_IEEE754 -#endif /* __BIG_ENDIAN */ - -#ifdef __i386__ -# define HAVE_GCC_ASM_FOR_X87 -#endif - - /* - * The definition in pyconfig.h is only valid on the OS release - * where configure ran on and not necessarily for all systems where - * the executable can be used on. - * - * Specifically: OSX 10.4 has limited supported for '%zd', while - * 10.5 has full support for '%zd'. A binary built on 10.5 won't - * work properly on 10.4 unless we surpress the definition - * of PY_FORMAT_SIZE_T - */ -#undef PY_FORMAT_SIZE_T - - -#endif /* defined(_APPLE__) */ - -#endif /* PYMACCONFIG_H */
--- a/test/include/python2.7/pymactoolbox.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,217 +0,0 @@ -/* -** pymactoolbox.h - globals defined in mactoolboxglue.c -*/ -#ifndef Py_PYMACTOOLBOX_H -#define Py_PYMACTOOLBOX_H -#ifdef __cplusplus - extern "C" { -#endif - -#include <Carbon/Carbon.h> - -#ifndef __LP64__ -#include <QuickTime/QuickTime.h> -#endif /* !__LP64__ */ - -/* -** Helper routines for error codes and such. -*/ -char *PyMac_StrError(int); /* strerror with mac errors */ -extern PyObject *PyMac_OSErrException; /* Exception for OSErr */ -PyObject *PyMac_GetOSErrException(void); /* Initialize & return it */ -PyObject *PyErr_Mac(PyObject *, int); /* Exception with a mac error */ -PyObject *PyMac_Error(OSErr); /* Uses PyMac_GetOSErrException */ -#ifndef __LP64__ -extern OSErr PyMac_GetFullPathname(FSSpec *, char *, int); /* convert - fsspec->path */ -#endif /* __LP64__ */ - -/* -** These conversion routines are defined in mactoolboxglue.c itself. -*/ -int PyMac_GetOSType(PyObject *, OSType *); /* argument parser for OSType */ -PyObject *PyMac_BuildOSType(OSType); /* Convert OSType to PyObject */ - -PyObject *PyMac_BuildNumVersion(NumVersion);/* Convert NumVersion to PyObject */ - -int PyMac_GetStr255(PyObject *, Str255); /* argument parser for Str255 */ -PyObject *PyMac_BuildStr255(Str255); /* Convert Str255 to PyObject */ -PyObject *PyMac_BuildOptStr255(Str255); /* Convert Str255 to PyObject, - NULL to None */ - -int PyMac_GetRect(PyObject *, Rect *); /* argument parser for Rect */ -PyObject *PyMac_BuildRect(Rect *); /* Convert Rect to PyObject */ - -int PyMac_GetPoint(PyObject *, Point *); /* argument parser for Point */ -PyObject *PyMac_BuildPoint(Point); /* Convert Point to PyObject */ - -int PyMac_GetEventRecord(PyObject *, EventRecord *); /* argument parser for - EventRecord */ -PyObject *PyMac_BuildEventRecord(EventRecord *); /* Convert EventRecord to - PyObject */ - -int PyMac_GetFixed(PyObject *, Fixed *); /* argument parser for Fixed */ -PyObject *PyMac_BuildFixed(Fixed); /* Convert Fixed to PyObject */ -int PyMac_Getwide(PyObject *, wide *); /* argument parser for wide */ -PyObject *PyMac_Buildwide(wide *); /* Convert wide to PyObject */ - -/* -** The rest of the routines are implemented by extension modules. If they are -** dynamically loaded mactoolboxglue will contain a stub implementation of the -** routine, which imports the module, whereupon the module's init routine will -** communicate the routine pointer back to the stub. -** If USE_TOOLBOX_OBJECT_GLUE is not defined there is no glue code, and the -** extension modules simply declare the routine. This is the case for static -** builds (and could be the case for MacPython CFM builds, because CFM extension -** modules can reference each other without problems). -*/ - -#ifdef USE_TOOLBOX_OBJECT_GLUE -/* -** These macros are used in the module init code. If we use toolbox object glue -** it sets the function pointer to point to the real function. -*/ -#define PyMac_INIT_TOOLBOX_OBJECT_NEW(object, rtn) { \ - extern PyObject *(*PyMacGluePtr_##rtn)(object); \ - PyMacGluePtr_##rtn = _##rtn; \ -} -#define PyMac_INIT_TOOLBOX_OBJECT_CONVERT(object, rtn) { \ - extern int (*PyMacGluePtr_##rtn)(PyObject *, object *); \ - PyMacGluePtr_##rtn = _##rtn; \ -} -#else -/* -** If we don't use toolbox object glue the init macros are empty. Moreover, we define -** _xxx_New to be the same as xxx_New, and the code in mactoolboxglue isn't included. -*/ -#define PyMac_INIT_TOOLBOX_OBJECT_NEW(object, rtn) -#define PyMac_INIT_TOOLBOX_OBJECT_CONVERT(object, rtn) -#endif /* USE_TOOLBOX_OBJECT_GLUE */ - -/* macfs exports */ -#ifndef __LP64__ -int PyMac_GetFSSpec(PyObject *, FSSpec *); /* argument parser for FSSpec */ -PyObject *PyMac_BuildFSSpec(FSSpec *); /* Convert FSSpec to PyObject */ -#endif /* !__LP64__ */ - -int PyMac_GetFSRef(PyObject *, FSRef *); /* argument parser for FSRef */ -PyObject *PyMac_BuildFSRef(FSRef *); /* Convert FSRef to PyObject */ - -/* AE exports */ -extern PyObject *AEDesc_New(AppleEvent *); /* XXXX Why passed by address?? */ -extern PyObject *AEDesc_NewBorrowed(AppleEvent *); -extern int AEDesc_Convert(PyObject *, AppleEvent *); - -/* Cm exports */ -extern PyObject *CmpObj_New(Component); -extern int CmpObj_Convert(PyObject *, Component *); -extern PyObject *CmpInstObj_New(ComponentInstance); -extern int CmpInstObj_Convert(PyObject *, ComponentInstance *); - -/* Ctl exports */ -#ifndef __LP64__ -extern PyObject *CtlObj_New(ControlHandle); -extern int CtlObj_Convert(PyObject *, ControlHandle *); -#endif /* !__LP64__ */ - -/* Dlg exports */ -#ifndef __LP64__ -extern PyObject *DlgObj_New(DialogPtr); -extern int DlgObj_Convert(PyObject *, DialogPtr *); -extern PyObject *DlgObj_WhichDialog(DialogPtr); -#endif /* !__LP64__ */ - -/* Drag exports */ -#ifndef __LP64__ -extern PyObject *DragObj_New(DragReference); -extern int DragObj_Convert(PyObject *, DragReference *); -#endif /* !__LP64__ */ - -/* List exports */ -#ifndef __LP64__ -extern PyObject *ListObj_New(ListHandle); -extern int ListObj_Convert(PyObject *, ListHandle *); -#endif /* !__LP64__ */ - -/* Menu exports */ -#ifndef __LP64__ -extern PyObject *MenuObj_New(MenuHandle); -extern int MenuObj_Convert(PyObject *, MenuHandle *); -#endif /* !__LP64__ */ - -/* Qd exports */ -#ifndef __LP64__ -extern PyObject *GrafObj_New(GrafPtr); -extern int GrafObj_Convert(PyObject *, GrafPtr *); -extern PyObject *BMObj_New(BitMapPtr); -extern int BMObj_Convert(PyObject *, BitMapPtr *); -extern PyObject *QdRGB_New(RGBColor *); -extern int QdRGB_Convert(PyObject *, RGBColor *); -#endif /* !__LP64__ */ - -/* Qdoffs exports */ -#ifndef __LP64__ -extern PyObject *GWorldObj_New(GWorldPtr); -extern int GWorldObj_Convert(PyObject *, GWorldPtr *); -#endif /* !__LP64__ */ - -/* Qt exports */ -#ifndef __LP64__ -extern PyObject *TrackObj_New(Track); -extern int TrackObj_Convert(PyObject *, Track *); -extern PyObject *MovieObj_New(Movie); -extern int MovieObj_Convert(PyObject *, Movie *); -extern PyObject *MovieCtlObj_New(MovieController); -extern int MovieCtlObj_Convert(PyObject *, MovieController *); -extern PyObject *TimeBaseObj_New(TimeBase); -extern int TimeBaseObj_Convert(PyObject *, TimeBase *); -extern PyObject *UserDataObj_New(UserData); -extern int UserDataObj_Convert(PyObject *, UserData *); -extern PyObject *MediaObj_New(Media); -extern int MediaObj_Convert(PyObject *, Media *); -#endif /* !__LP64__ */ - -/* Res exports */ -extern PyObject *ResObj_New(Handle); -extern int ResObj_Convert(PyObject *, Handle *); -extern PyObject *OptResObj_New(Handle); -extern int OptResObj_Convert(PyObject *, Handle *); - -/* TE exports */ -#ifndef __LP64__ -extern PyObject *TEObj_New(TEHandle); -extern int TEObj_Convert(PyObject *, TEHandle *); -#endif /* !__LP64__ */ - -/* Win exports */ -#ifndef __LP64__ -extern PyObject *WinObj_New(WindowPtr); -extern int WinObj_Convert(PyObject *, WindowPtr *); -extern PyObject *WinObj_WhichWindow(WindowPtr); -#endif /* !__LP64__ */ - -/* CF exports */ -extern PyObject *CFObj_New(CFTypeRef); -extern int CFObj_Convert(PyObject *, CFTypeRef *); -extern PyObject *CFTypeRefObj_New(CFTypeRef); -extern int CFTypeRefObj_Convert(PyObject *, CFTypeRef *); -extern PyObject *CFStringRefObj_New(CFStringRef); -extern int CFStringRefObj_Convert(PyObject *, CFStringRef *); -extern PyObject *CFMutableStringRefObj_New(CFMutableStringRef); -extern int CFMutableStringRefObj_Convert(PyObject *, CFMutableStringRef *); -extern PyObject *CFArrayRefObj_New(CFArrayRef); -extern int CFArrayRefObj_Convert(PyObject *, CFArrayRef *); -extern PyObject *CFMutableArrayRefObj_New(CFMutableArrayRef); -extern int CFMutableArrayRefObj_Convert(PyObject *, CFMutableArrayRef *); -extern PyObject *CFDictionaryRefObj_New(CFDictionaryRef); -extern int CFDictionaryRefObj_Convert(PyObject *, CFDictionaryRef *); -extern PyObject *CFMutableDictionaryRefObj_New(CFMutableDictionaryRef); -extern int CFMutableDictionaryRefObj_Convert(PyObject *, CFMutableDictionaryRef *); -extern PyObject *CFURLRefObj_New(CFURLRef); -extern int CFURLRefObj_Convert(PyObject *, CFURLRef *); -extern int OptionalCFURLRefObj_Convert(PyObject *, CFURLRef *); - -#ifdef __cplusplus - } -#endif -#endif
--- a/test/include/python2.7/pymath.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,214 +0,0 @@ -#ifndef Py_PYMATH_H -#define Py_PYMATH_H - -#include "pyconfig.h" /* include for defines */ - -/************************************************************************** -Symbols and macros to supply platform-independent interfaces to mathematical -functions and constants -**************************************************************************/ - -/* Python provides implementations for copysign, round and hypot in - * Python/pymath.c just in case your math library doesn't provide the - * functions. - * - *Note: PC/pyconfig.h defines copysign as _copysign - */ -#ifndef HAVE_COPYSIGN -extern double copysign(double, double); -#endif - -#ifndef HAVE_ROUND -extern double round(double); -#endif - -#ifndef HAVE_HYPOT -extern double hypot(double, double); -#endif - -/* extra declarations */ -#ifndef _MSC_VER -#ifndef __STDC__ -extern double fmod (double, double); -extern double frexp (double, int *); -extern double ldexp (double, int); -extern double modf (double, double *); -extern double pow(double, double); -#endif /* __STDC__ */ -#endif /* _MSC_VER */ - -#ifdef _OSF_SOURCE -/* OSF1 5.1 doesn't make these available with XOPEN_SOURCE_EXTENDED defined */ -extern int finite(double); -extern double copysign(double, double); -#endif - -/* High precision defintion of pi and e (Euler) - * The values are taken from libc6's math.h. - */ -#ifndef Py_MATH_PIl -#define Py_MATH_PIl 3.1415926535897932384626433832795029L -#endif -#ifndef Py_MATH_PI -#define Py_MATH_PI 3.14159265358979323846 -#endif - -#ifndef Py_MATH_El -#define Py_MATH_El 2.7182818284590452353602874713526625L -#endif - -#ifndef Py_MATH_E -#define Py_MATH_E 2.7182818284590452354 -#endif - -/* On x86, Py_FORCE_DOUBLE forces a floating-point number out of an x87 FPU - register and into a 64-bit memory location, rounding from extended - precision to double precision in the process. On other platforms it does - nothing. */ - -/* we take double rounding as evidence of x87 usage */ -#ifndef Py_FORCE_DOUBLE -# ifdef X87_DOUBLE_ROUNDING -PyAPI_FUNC(double) _Py_force_double(double); -# define Py_FORCE_DOUBLE(X) (_Py_force_double(X)) -# else -# define Py_FORCE_DOUBLE(X) (X) -# endif -#endif - -#ifdef HAVE_GCC_ASM_FOR_X87 -PyAPI_FUNC(unsigned short) _Py_get_387controlword(void); -PyAPI_FUNC(void) _Py_set_387controlword(unsigned short); -#endif - -/* Py_IS_NAN(X) - * Return 1 if float or double arg is a NaN, else 0. - * Caution: - * X is evaluated more than once. - * This may not work on all platforms. Each platform has *some* - * way to spell this, though -- override in pyconfig.h if you have - * a platform where it doesn't work. - * Note: PC/pyconfig.h defines Py_IS_NAN as _isnan - */ -#ifndef Py_IS_NAN -#if defined HAVE_DECL_ISNAN && HAVE_DECL_ISNAN == 1 -#define Py_IS_NAN(X) isnan(X) -#else -#define Py_IS_NAN(X) ((X) != (X)) -#endif -#endif - -/* Py_IS_INFINITY(X) - * Return 1 if float or double arg is an infinity, else 0. - * Caution: - * X is evaluated more than once. - * This implementation may set the underflow flag if |X| is very small; - * it really can't be implemented correctly (& easily) before C99. - * Override in pyconfig.h if you have a better spelling on your platform. - * Py_FORCE_DOUBLE is used to avoid getting false negatives from a - * non-infinite value v sitting in an 80-bit x87 register such that - * v becomes infinite when spilled from the register to 64-bit memory. - * Note: PC/pyconfig.h defines Py_IS_INFINITY as _isinf - */ -#ifndef Py_IS_INFINITY -# if defined HAVE_DECL_ISINF && HAVE_DECL_ISINF == 1 -# define Py_IS_INFINITY(X) isinf(X) -# else -# define Py_IS_INFINITY(X) ((X) && \ - (Py_FORCE_DOUBLE(X)*0.5 == Py_FORCE_DOUBLE(X))) -# endif -#endif - -/* Py_IS_FINITE(X) - * Return 1 if float or double arg is neither infinite nor NAN, else 0. - * Some compilers (e.g. VisualStudio) have intrisics for this, so a special - * macro for this particular test is useful - * Note: PC/pyconfig.h defines Py_IS_FINITE as _finite - */ -#ifndef Py_IS_FINITE -#if defined HAVE_DECL_ISFINITE && HAVE_DECL_ISFINITE == 1 -#define Py_IS_FINITE(X) isfinite(X) -#elif defined HAVE_FINITE -#define Py_IS_FINITE(X) finite(X) -#else -#define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) -#endif -#endif - -/* HUGE_VAL is supposed to expand to a positive double infinity. Python - * uses Py_HUGE_VAL instead because some platforms are broken in this - * respect. We used to embed code in pyport.h to try to worm around that, - * but different platforms are broken in conflicting ways. If you're on - * a platform where HUGE_VAL is defined incorrectly, fiddle your Python - * config to #define Py_HUGE_VAL to something that works on your platform. - */ -#ifndef Py_HUGE_VAL -#define Py_HUGE_VAL HUGE_VAL -#endif - -/* Py_NAN - * A value that evaluates to a NaN. On IEEE 754 platforms INF*0 or - * INF/INF works. Define Py_NO_NAN in pyconfig.h if your platform - * doesn't support NaNs. - */ -#if !defined(Py_NAN) && !defined(Py_NO_NAN) -#if !defined(__INTEL_COMPILER) - #define Py_NAN (Py_HUGE_VAL * 0.) -#else /* __INTEL_COMPILER */ - #if defined(ICC_NAN_STRICT) - #pragma float_control(push) - #pragma float_control(precise, on) - #pragma float_control(except, on) - #if defined(_MSC_VER) - __declspec(noinline) - #else /* Linux */ - __attribute__((noinline)) - #endif /* _MSC_VER */ - static double __icc_nan() - { - return sqrt(-1.0); - } - #pragma float_control (pop) - #define Py_NAN __icc_nan() - #else /* ICC_NAN_RELAXED as default for Intel Compiler */ - static union { unsigned char buf[8]; double __icc_nan; } __nan_store = {0,0,0,0,0,0,0xf8,0x7f}; - #define Py_NAN (__nan_store.__icc_nan) - #endif /* ICC_NAN_STRICT */ -#endif /* __INTEL_COMPILER */ -#endif - -/* Py_OVERFLOWED(X) - * Return 1 iff a libm function overflowed. Set errno to 0 before calling - * a libm function, and invoke this macro after, passing the function - * result. - * Caution: - * This isn't reliable. C99 no longer requires libm to set errno under - * any exceptional condition, but does require +- HUGE_VAL return - * values on overflow. A 754 box *probably* maps HUGE_VAL to a - * double infinity, and we're cool if that's so, unless the input - * was an infinity and an infinity is the expected result. A C89 - * system sets errno to ERANGE, so we check for that too. We're - * out of luck if a C99 754 box doesn't map HUGE_VAL to +Inf, or - * if the returned result is a NaN, or if a C89 box returns HUGE_VAL - * in non-overflow cases. - * X is evaluated more than once. - * Some platforms have better way to spell this, so expect some #ifdef'ery. - * - * OpenBSD uses 'isinf()' because a compiler bug on that platform causes - * the longer macro version to be mis-compiled. This isn't optimal, and - * should be removed once a newer compiler is available on that platform. - * The system that had the failure was running OpenBSD 3.2 on Intel, with - * gcc 2.95.3. - * - * According to Tim's checkin, the FreeBSD systems use isinf() to work - * around a FPE bug on that platform. - */ -#if defined(__FreeBSD__) || defined(__OpenBSD__) -#define Py_OVERFLOWED(X) isinf(X) -#else -#define Py_OVERFLOWED(X) ((X) != 0.0 && (errno == ERANGE || \ - (X) == Py_HUGE_VAL || \ - (X) == -Py_HUGE_VAL)) -#endif - -#endif /* Py_PYMATH_H */
--- a/test/include/python2.7/pymem.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,122 +0,0 @@ -/* The PyMem_ family: low-level memory allocation interfaces. - See objimpl.h for the PyObject_ memory family. -*/ - -#ifndef Py_PYMEM_H -#define Py_PYMEM_H - -#include "pyport.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* BEWARE: - - Each interface exports both functions and macros. Extension modules should - use the functions, to ensure binary compatibility across Python versions. - Because the Python implementation is free to change internal details, and - the macros may (or may not) expose details for speed, if you do use the - macros you must recompile your extensions with each Python release. - - Never mix calls to PyMem_ with calls to the platform malloc/realloc/ - calloc/free. For example, on Windows different DLLs may end up using - different heaps, and if you use PyMem_Malloc you'll get the memory from the - heap used by the Python DLL; it could be a disaster if you free()'ed that - directly in your own extension. Using PyMem_Free instead ensures Python - can return the memory to the proper heap. As another example, in - PYMALLOC_DEBUG mode, Python wraps all calls to all PyMem_ and PyObject_ - memory functions in special debugging wrappers that add additional - debugging info to dynamic memory blocks. The system routines have no idea - what to do with that stuff, and the Python wrappers have no idea what to do - with raw blocks obtained directly by the system routines then. - - The GIL must be held when using these APIs. -*/ - -/* - * Raw memory interface - * ==================== - */ - -/* Functions - - Functions supplying platform-independent semantics for malloc/realloc/ - free. These functions make sure that allocating 0 bytes returns a distinct - non-NULL pointer (whenever possible -- if we're flat out of memory, NULL - may be returned), even if the platform malloc and realloc don't. - Returned pointers must be checked for NULL explicitly. No action is - performed on failure (no exception is set, no warning is printed, etc). -*/ - -PyAPI_FUNC(void *) PyMem_Malloc(size_t); -PyAPI_FUNC(void *) PyMem_Realloc(void *, size_t); -PyAPI_FUNC(void) PyMem_Free(void *); - -/* Starting from Python 1.6, the wrappers Py_{Malloc,Realloc,Free} are - no longer supported. They used to call PyErr_NoMemory() on failure. */ - -/* Macros. */ -#ifdef PYMALLOC_DEBUG -/* Redirect all memory operations to Python's debugging allocator. */ -#define PyMem_MALLOC _PyMem_DebugMalloc -#define PyMem_REALLOC _PyMem_DebugRealloc -#define PyMem_FREE _PyMem_DebugFree - -#else /* ! PYMALLOC_DEBUG */ - -/* PyMem_MALLOC(0) means malloc(1). Some systems would return NULL - for malloc(0), which would be treated as an error. Some platforms - would return a pointer with no memory behind it, which would break - pymalloc. To solve these problems, allocate an extra byte. */ -/* Returns NULL to indicate error if a negative size or size larger than - Py_ssize_t can represent is supplied. Helps prevents security holes. */ -#define PyMem_MALLOC(n) ((size_t)(n) > (size_t)PY_SSIZE_T_MAX ? NULL \ - : malloc((n) ? (n) : 1)) -#define PyMem_REALLOC(p, n) ((size_t)(n) > (size_t)PY_SSIZE_T_MAX ? NULL \ - : realloc((p), (n) ? (n) : 1)) -#define PyMem_FREE free - -#endif /* PYMALLOC_DEBUG */ - -/* - * Type-oriented memory interface - * ============================== - * - * Allocate memory for n objects of the given type. Returns a new pointer - * or NULL if the request was too large or memory allocation failed. Use - * these macros rather than doing the multiplication yourself so that proper - * overflow checking is always done. - */ - -#define PyMem_New(type, n) \ - ( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ - ( (type *) PyMem_Malloc((n) * sizeof(type)) ) ) -#define PyMem_NEW(type, n) \ - ( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ - ( (type *) PyMem_MALLOC((n) * sizeof(type)) ) ) - -/* - * The value of (p) is always clobbered by this macro regardless of success. - * The caller MUST check if (p) is NULL afterwards and deal with the memory - * error if so. This means the original value of (p) MUST be saved for the - * caller's memory error handler to not lose track of it. - */ -#define PyMem_Resize(p, type, n) \ - ( (p) = ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ - (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) -#define PyMem_RESIZE(p, type, n) \ - ( (p) = ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ - (type *) PyMem_REALLOC((p), (n) * sizeof(type)) ) - -/* PyMem{Del,DEL} are left over from ancient days, and shouldn't be used - * anymore. They're just confusing aliases for PyMem_{Free,FREE} now. - */ -#define PyMem_Del PyMem_Free -#define PyMem_DEL PyMem_FREE - -#ifdef __cplusplus -} -#endif - -#endif /* !Py_PYMEM_H */
--- a/test/include/python2.7/pyport.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,941 +0,0 @@ -#ifndef Py_PYPORT_H -#define Py_PYPORT_H - -#include "pyconfig.h" /* include for defines */ - -/* Some versions of HP-UX & Solaris need inttypes.h for int32_t, - INT32_MAX, etc. */ -#ifdef HAVE_INTTYPES_H -#include <inttypes.h> -#endif - -#ifdef HAVE_STDINT_H -#include <stdint.h> -#endif - -/************************************************************************** -Symbols and macros to supply platform-independent interfaces to basic -C language & library operations whose spellings vary across platforms. - -Please try to make documentation here as clear as possible: by definition, -the stuff here is trying to illuminate C's darkest corners. - -Config #defines referenced here: - -SIGNED_RIGHT_SHIFT_ZERO_FILLS -Meaning: To be defined iff i>>j does not extend the sign bit when i is a - signed integral type and i < 0. -Used in: Py_ARITHMETIC_RIGHT_SHIFT - -Py_DEBUG -Meaning: Extra checks compiled in for debug mode. -Used in: Py_SAFE_DOWNCAST - -HAVE_UINTPTR_T -Meaning: The C9X type uintptr_t is supported by the compiler -Used in: Py_uintptr_t - -HAVE_LONG_LONG -Meaning: The compiler supports the C type "long long" -Used in: PY_LONG_LONG - -**************************************************************************/ - - -/* For backward compatibility only. Obsolete, do not use. */ -#ifdef HAVE_PROTOTYPES -#define Py_PROTO(x) x -#else -#define Py_PROTO(x) () -#endif -#ifndef Py_FPROTO -#define Py_FPROTO(x) Py_PROTO(x) -#endif - -/* typedefs for some C9X-defined synonyms for integral types. - * - * The names in Python are exactly the same as the C9X names, except with a - * Py_ prefix. Until C9X is universally implemented, this is the only way - * to ensure that Python gets reliable names that don't conflict with names - * in non-Python code that are playing their own tricks to define the C9X - * names. - * - * NOTE: don't go nuts here! Python has no use for *most* of the C9X - * integral synonyms. Only define the ones we actually need. - */ - -#ifdef HAVE_LONG_LONG -#ifndef PY_LONG_LONG -#define PY_LONG_LONG long long -#if defined(LLONG_MAX) -/* If LLONG_MAX is defined in limits.h, use that. */ -#define PY_LLONG_MIN LLONG_MIN -#define PY_LLONG_MAX LLONG_MAX -#define PY_ULLONG_MAX ULLONG_MAX -#elif defined(__LONG_LONG_MAX__) -/* Otherwise, if GCC has a builtin define, use that. */ -#define PY_LLONG_MAX __LONG_LONG_MAX__ -#define PY_LLONG_MIN (-PY_LLONG_MAX-1) -#define PY_ULLONG_MAX (__LONG_LONG_MAX__*2ULL + 1ULL) -#else -/* Otherwise, rely on two's complement. */ -#define PY_ULLONG_MAX (~0ULL) -#define PY_LLONG_MAX ((long long)(PY_ULLONG_MAX>>1)) -#define PY_LLONG_MIN (-PY_LLONG_MAX-1) -#endif /* LLONG_MAX */ -#endif -#endif /* HAVE_LONG_LONG */ - -/* a build with 30-bit digits for Python long integers needs an exact-width - * 32-bit unsigned integer type to store those digits. (We could just use - * type 'unsigned long', but that would be wasteful on a system where longs - * are 64-bits.) On Unix systems, the autoconf macro AC_TYPE_UINT32_T defines - * uint32_t to be such a type unless stdint.h or inttypes.h defines uint32_t. - * However, it doesn't set HAVE_UINT32_T, so we do that here. - */ -#ifdef uint32_t -#define HAVE_UINT32_T 1 -#endif - -#ifdef HAVE_UINT32_T -#ifndef PY_UINT32_T -#define PY_UINT32_T uint32_t -#endif -#endif - -/* Macros for a 64-bit unsigned integer type; used for type 'twodigits' in the - * long integer implementation, when 30-bit digits are enabled. - */ -#ifdef uint64_t -#define HAVE_UINT64_T 1 -#endif - -#ifdef HAVE_UINT64_T -#ifndef PY_UINT64_T -#define PY_UINT64_T uint64_t -#endif -#endif - -/* Signed variants of the above */ -#ifdef int32_t -#define HAVE_INT32_T 1 -#endif - -#ifdef HAVE_INT32_T -#ifndef PY_INT32_T -#define PY_INT32_T int32_t -#endif -#endif - -#ifdef int64_t -#define HAVE_INT64_T 1 -#endif - -#ifdef HAVE_INT64_T -#ifndef PY_INT64_T -#define PY_INT64_T int64_t -#endif -#endif - -/* If PYLONG_BITS_IN_DIGIT is not defined then we'll use 30-bit digits if all - the necessary integer types are available, and we're on a 64-bit platform - (as determined by SIZEOF_VOID_P); otherwise we use 15-bit digits. */ - -#ifndef PYLONG_BITS_IN_DIGIT -#if (defined HAVE_UINT64_T && defined HAVE_INT64_T && \ - defined HAVE_UINT32_T && defined HAVE_INT32_T && SIZEOF_VOID_P >= 8) -#define PYLONG_BITS_IN_DIGIT 30 -#else -#define PYLONG_BITS_IN_DIGIT 15 -#endif -#endif - -/* uintptr_t is the C9X name for an unsigned integral type such that a - * legitimate void* can be cast to uintptr_t and then back to void* again - * without loss of information. Similarly for intptr_t, wrt a signed - * integral type. - */ -#ifdef HAVE_UINTPTR_T -typedef uintptr_t Py_uintptr_t; -typedef intptr_t Py_intptr_t; - -#elif SIZEOF_VOID_P <= SIZEOF_INT -typedef unsigned int Py_uintptr_t; -typedef int Py_intptr_t; - -#elif SIZEOF_VOID_P <= SIZEOF_LONG -typedef unsigned long Py_uintptr_t; -typedef long Py_intptr_t; - -#elif defined(HAVE_LONG_LONG) && (SIZEOF_VOID_P <= SIZEOF_LONG_LONG) -typedef unsigned PY_LONG_LONG Py_uintptr_t; -typedef PY_LONG_LONG Py_intptr_t; - -#else -# error "Python needs a typedef for Py_uintptr_t in pyport.h." -#endif /* HAVE_UINTPTR_T */ - -/* Py_ssize_t is a signed integral type such that sizeof(Py_ssize_t) == - * sizeof(size_t). C99 doesn't define such a thing directly (size_t is an - * unsigned integral type). See PEP 353 for details. - */ -#ifdef HAVE_SSIZE_T -typedef ssize_t Py_ssize_t; -#elif SIZEOF_VOID_P == SIZEOF_SIZE_T -typedef Py_intptr_t Py_ssize_t; -#else -# error "Python needs a typedef for Py_ssize_t in pyport.h." -#endif - -/* Largest possible value of size_t. - SIZE_MAX is part of C99, so it might be defined on some - platforms. If it is not defined, (size_t)-1 is a portable - definition for C89, due to the way signed->unsigned - conversion is defined. */ -#ifdef SIZE_MAX -#define PY_SIZE_MAX SIZE_MAX -#else -#define PY_SIZE_MAX ((size_t)-1) -#endif - -/* Largest positive value of type Py_ssize_t. */ -#define PY_SSIZE_T_MAX ((Py_ssize_t)(((size_t)-1)>>1)) -/* Smallest negative value of type Py_ssize_t. */ -#define PY_SSIZE_T_MIN (-PY_SSIZE_T_MAX-1) - -#if SIZEOF_PID_T > SIZEOF_LONG -# error "Python doesn't support sizeof(pid_t) > sizeof(long)" -#endif - -/* PY_FORMAT_SIZE_T is a platform-specific modifier for use in a printf - * format to convert an argument with the width of a size_t or Py_ssize_t. - * C99 introduced "z" for this purpose, but not all platforms support that; - * e.g., MS compilers use "I" instead. - * - * These "high level" Python format functions interpret "z" correctly on - * all platforms (Python interprets the format string itself, and does whatever - * the platform C requires to convert a size_t/Py_ssize_t argument): - * - * PyString_FromFormat - * PyErr_Format - * PyString_FromFormatV - * - * Lower-level uses require that you interpolate the correct format modifier - * yourself (e.g., calling printf, fprintf, sprintf, PyOS_snprintf); for - * example, - * - * Py_ssize_t index; - * fprintf(stderr, "index %" PY_FORMAT_SIZE_T "d sucks\n", index); - * - * That will expand to %ld, or %Id, or to something else correct for a - * Py_ssize_t on the platform. - */ -#ifndef PY_FORMAT_SIZE_T -# if SIZEOF_SIZE_T == SIZEOF_INT && !defined(__APPLE__) -# define PY_FORMAT_SIZE_T "" -# elif SIZEOF_SIZE_T == SIZEOF_LONG -# define PY_FORMAT_SIZE_T "l" -# elif defined(MS_WINDOWS) -# define PY_FORMAT_SIZE_T "I" -# else -# error "This platform's pyconfig.h needs to define PY_FORMAT_SIZE_T" -# endif -#endif - -/* PY_FORMAT_LONG_LONG is analogous to PY_FORMAT_SIZE_T above, but for - * the long long type instead of the size_t type. It's only available - * when HAVE_LONG_LONG is defined. The "high level" Python format - * functions listed above will interpret "lld" or "llu" correctly on - * all platforms. - */ -#ifdef HAVE_LONG_LONG -# ifndef PY_FORMAT_LONG_LONG -# if defined(MS_WIN64) || defined(MS_WINDOWS) -# define PY_FORMAT_LONG_LONG "I64" -# else -# error "This platform's pyconfig.h needs to define PY_FORMAT_LONG_LONG" -# endif -# endif -#endif - -/* Py_LOCAL can be used instead of static to get the fastest possible calling - * convention for functions that are local to a given module. - * - * Py_LOCAL_INLINE does the same thing, and also explicitly requests inlining, - * for platforms that support that. - * - * If PY_LOCAL_AGGRESSIVE is defined before python.h is included, more - * "aggressive" inlining/optimizaion is enabled for the entire module. This - * may lead to code bloat, and may slow things down for those reasons. It may - * also lead to errors, if the code relies on pointer aliasing. Use with - * care. - * - * NOTE: You can only use this for functions that are entirely local to a - * module; functions that are exported via method tables, callbacks, etc, - * should keep using static. - */ - -#undef USE_INLINE /* XXX - set via configure? */ - -#if defined(_MSC_VER) -#if defined(PY_LOCAL_AGGRESSIVE) -/* enable more aggressive optimization for visual studio */ -#pragma optimize("agtw", on) -#endif -/* ignore warnings if the compiler decides not to inline a function */ -#pragma warning(disable: 4710) -/* fastest possible local call under MSVC */ -#define Py_LOCAL(type) static type __fastcall -#define Py_LOCAL_INLINE(type) static __inline type __fastcall -#elif defined(USE_INLINE) -#define Py_LOCAL(type) static type -#define Py_LOCAL_INLINE(type) static inline type -#else -#define Py_LOCAL(type) static type -#define Py_LOCAL_INLINE(type) static type -#endif - -/* Py_MEMCPY can be used instead of memcpy in cases where the copied blocks - * are often very short. While most platforms have highly optimized code for - * large transfers, the setup costs for memcpy are often quite high. MEMCPY - * solves this by doing short copies "in line". - */ - -#if defined(_MSC_VER) -#define Py_MEMCPY(target, source, length) do { \ - size_t i_, n_ = (length); \ - char *t_ = (void*) (target); \ - const char *s_ = (void*) (source); \ - if (n_ >= 16) \ - memcpy(t_, s_, n_); \ - else \ - for (i_ = 0; i_ < n_; i_++) \ - t_[i_] = s_[i_]; \ - } while (0) -#else -#define Py_MEMCPY memcpy -#endif - -#include <stdlib.h> - -#ifdef HAVE_IEEEFP_H -#include <ieeefp.h> /* needed for 'finite' declaration on some platforms */ -#endif - -#include <math.h> /* Moved here from the math section, before extern "C" */ - -/******************************************** - * WRAPPER FOR <time.h> and/or <sys/time.h> * - ********************************************/ - -#ifdef TIME_WITH_SYS_TIME -#include <sys/time.h> -#include <time.h> -#else /* !TIME_WITH_SYS_TIME */ -#ifdef HAVE_SYS_TIME_H -#include <sys/time.h> -#else /* !HAVE_SYS_TIME_H */ -#include <time.h> -#endif /* !HAVE_SYS_TIME_H */ -#endif /* !TIME_WITH_SYS_TIME */ - - -/****************************** - * WRAPPER FOR <sys/select.h> * - ******************************/ - -/* NB caller must include <sys/types.h> */ - -#ifdef HAVE_SYS_SELECT_H - -#include <sys/select.h> - -#endif /* !HAVE_SYS_SELECT_H */ - -/******************************* - * stat() and fstat() fiddling * - *******************************/ - -/* We expect that stat and fstat exist on most systems. - * It's confirmed on Unix, Mac and Windows. - * If you don't have them, add - * #define DONT_HAVE_STAT - * and/or - * #define DONT_HAVE_FSTAT - * to your pyconfig.h. Python code beyond this should check HAVE_STAT and - * HAVE_FSTAT instead. - * Also - * #define HAVE_SYS_STAT_H - * if <sys/stat.h> exists on your platform, and - * #define HAVE_STAT_H - * if <stat.h> does. - */ -#ifndef DONT_HAVE_STAT -#define HAVE_STAT -#endif - -#ifndef DONT_HAVE_FSTAT -#define HAVE_FSTAT -#endif - -#ifdef RISCOS -#include <sys/types.h> -#include "unixstuff.h" -#endif - -#ifdef HAVE_SYS_STAT_H -#if defined(PYOS_OS2) && defined(PYCC_GCC) -#include <sys/types.h> -#endif -#include <sys/stat.h> -#elif defined(HAVE_STAT_H) -#include <stat.h> -#endif - -#if defined(PYCC_VACPP) -/* VisualAge C/C++ Failed to Define MountType Field in sys/stat.h */ -#define S_IFMT (S_IFDIR|S_IFCHR|S_IFREG) -#endif - -#ifndef S_ISREG -#define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) -#endif - -#ifndef S_ISDIR -#define S_ISDIR(x) (((x) & S_IFMT) == S_IFDIR) -#endif - - -#ifdef __cplusplus -/* Move this down here since some C++ #include's don't like to be included - inside an extern "C" */ -extern "C" { -#endif - - -/* Py_ARITHMETIC_RIGHT_SHIFT - * C doesn't define whether a right-shift of a signed integer sign-extends - * or zero-fills. Here a macro to force sign extension: - * Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) - * Return I >> J, forcing sign extension. Arithmetically, return the - * floor of I/2**J. - * Requirements: - * I should have signed integer type. In the terminology of C99, this can - * be either one of the five standard signed integer types (signed char, - * short, int, long, long long) or an extended signed integer type. - * J is an integer >= 0 and strictly less than the number of bits in the - * type of I (because C doesn't define what happens for J outside that - * range either). - * TYPE used to specify the type of I, but is now ignored. It's been left - * in for backwards compatibility with versions <= 2.6 or 3.0. - * Caution: - * I may be evaluated more than once. - */ -#ifdef SIGNED_RIGHT_SHIFT_ZERO_FILLS -#define Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) \ - ((I) < 0 ? -1-((-1-(I)) >> (J)) : (I) >> (J)) -#else -#define Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) ((I) >> (J)) -#endif - -/* Py_FORCE_EXPANSION(X) - * "Simply" returns its argument. However, macro expansions within the - * argument are evaluated. This unfortunate trickery is needed to get - * token-pasting to work as desired in some cases. - */ -#define Py_FORCE_EXPANSION(X) X - -/* Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) - * Cast VALUE to type NARROW from type WIDE. In Py_DEBUG mode, this - * assert-fails if any information is lost. - * Caution: - * VALUE may be evaluated more than once. - */ -#ifdef Py_DEBUG -#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) \ - (assert((WIDE)(NARROW)(VALUE) == (VALUE)), (NARROW)(VALUE)) -#else -#define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) (NARROW)(VALUE) -#endif - -/* Py_SET_ERRNO_ON_MATH_ERROR(x) - * If a libm function did not set errno, but it looks like the result - * overflowed or not-a-number, set errno to ERANGE or EDOM. Set errno - * to 0 before calling a libm function, and invoke this macro after, - * passing the function result. - * Caution: - * This isn't reliable. See Py_OVERFLOWED comments. - * X is evaluated more than once. - */ -#if defined(__FreeBSD__) || defined(__OpenBSD__) || (defined(__hpux) && defined(__ia64)) -#define _Py_SET_EDOM_FOR_NAN(X) if (isnan(X)) errno = EDOM; -#else -#define _Py_SET_EDOM_FOR_NAN(X) ; -#endif -#define Py_SET_ERRNO_ON_MATH_ERROR(X) \ - do { \ - if (errno == 0) { \ - if ((X) == Py_HUGE_VAL || (X) == -Py_HUGE_VAL) \ - errno = ERANGE; \ - else _Py_SET_EDOM_FOR_NAN(X) \ - } \ - } while(0) - -/* Py_SET_ERANGE_ON_OVERFLOW(x) - * An alias of Py_SET_ERRNO_ON_MATH_ERROR for backward-compatibility. - */ -#define Py_SET_ERANGE_IF_OVERFLOW(X) Py_SET_ERRNO_ON_MATH_ERROR(X) - -/* Py_ADJUST_ERANGE1(x) - * Py_ADJUST_ERANGE2(x, y) - * Set errno to 0 before calling a libm function, and invoke one of these - * macros after, passing the function result(s) (Py_ADJUST_ERANGE2 is useful - * for functions returning complex results). This makes two kinds of - * adjustments to errno: (A) If it looks like the platform libm set - * errno=ERANGE due to underflow, clear errno. (B) If it looks like the - * platform libm overflowed but didn't set errno, force errno to ERANGE. In - * effect, we're trying to force a useful implementation of C89 errno - * behavior. - * Caution: - * This isn't reliable. See Py_OVERFLOWED comments. - * X and Y may be evaluated more than once. - */ -#define Py_ADJUST_ERANGE1(X) \ - do { \ - if (errno == 0) { \ - if ((X) == Py_HUGE_VAL || (X) == -Py_HUGE_VAL) \ - errno = ERANGE; \ - } \ - else if (errno == ERANGE && (X) == 0.0) \ - errno = 0; \ - } while(0) - -#define Py_ADJUST_ERANGE2(X, Y) \ - do { \ - if ((X) == Py_HUGE_VAL || (X) == -Py_HUGE_VAL || \ - (Y) == Py_HUGE_VAL || (Y) == -Py_HUGE_VAL) { \ - if (errno == 0) \ - errno = ERANGE; \ - } \ - else if (errno == ERANGE) \ - errno = 0; \ - } while(0) - -/* The functions _Py_dg_strtod and _Py_dg_dtoa in Python/dtoa.c (which are - * required to support the short float repr introduced in Python 3.1) require - * that the floating-point unit that's being used for arithmetic operations - * on C doubles is set to use 53-bit precision. It also requires that the - * FPU rounding mode is round-half-to-even, but that's less often an issue. - * - * If your FPU isn't already set to 53-bit precision/round-half-to-even, and - * you want to make use of _Py_dg_strtod and _Py_dg_dtoa, then you should - * - * #define HAVE_PY_SET_53BIT_PRECISION 1 - * - * and also give appropriate definitions for the following three macros: - * - * _PY_SET_53BIT_PRECISION_START : store original FPU settings, and - * set FPU to 53-bit precision/round-half-to-even - * _PY_SET_53BIT_PRECISION_END : restore original FPU settings - * _PY_SET_53BIT_PRECISION_HEADER : any variable declarations needed to - * use the two macros above. - * - * The macros are designed to be used within a single C function: see - * Python/pystrtod.c for an example of their use. - */ - -/* get and set x87 control word for gcc/x86 */ -#ifdef HAVE_GCC_ASM_FOR_X87 -#define HAVE_PY_SET_53BIT_PRECISION 1 -/* _Py_get/set_387controlword functions are defined in Python/pymath.c */ -#define _Py_SET_53BIT_PRECISION_HEADER \ - unsigned short old_387controlword, new_387controlword -#define _Py_SET_53BIT_PRECISION_START \ - do { \ - old_387controlword = _Py_get_387controlword(); \ - new_387controlword = (old_387controlword & ~0x0f00) | 0x0200; \ - if (new_387controlword != old_387controlword) \ - _Py_set_387controlword(new_387controlword); \ - } while (0) -#define _Py_SET_53BIT_PRECISION_END \ - if (new_387controlword != old_387controlword) \ - _Py_set_387controlword(old_387controlword) -#endif - -/* get and set x87 control word for VisualStudio/x86 */ -#if defined(_MSC_VER) && !defined(_WIN64) /* x87 not supported in 64-bit */ -#define HAVE_PY_SET_53BIT_PRECISION 1 -#define _Py_SET_53BIT_PRECISION_HEADER \ - unsigned int old_387controlword, new_387controlword, out_387controlword -/* We use the __control87_2 function to set only the x87 control word. - The SSE control word is unaffected. */ -#define _Py_SET_53BIT_PRECISION_START \ - do { \ - __control87_2(0, 0, &old_387controlword, NULL); \ - new_387controlword = \ - (old_387controlword & ~(_MCW_PC | _MCW_RC)) | (_PC_53 | _RC_NEAR); \ - if (new_387controlword != old_387controlword) \ - __control87_2(new_387controlword, _MCW_PC | _MCW_RC, \ - &out_387controlword, NULL); \ - } while (0) -#define _Py_SET_53BIT_PRECISION_END \ - do { \ - if (new_387controlword != old_387controlword) \ - __control87_2(old_387controlword, _MCW_PC | _MCW_RC, \ - &out_387controlword, NULL); \ - } while (0) -#endif - -/* default definitions are empty */ -#ifndef HAVE_PY_SET_53BIT_PRECISION -#define _Py_SET_53BIT_PRECISION_HEADER -#define _Py_SET_53BIT_PRECISION_START -#define _Py_SET_53BIT_PRECISION_END -#endif - -/* If we can't guarantee 53-bit precision, don't use the code - in Python/dtoa.c, but fall back to standard code. This - means that repr of a float will be long (17 sig digits). - - Realistically, there are two things that could go wrong: - - (1) doubles aren't IEEE 754 doubles, or - (2) we're on x86 with the rounding precision set to 64-bits - (extended precision), and we don't know how to change - the rounding precision. - */ - -#if !defined(DOUBLE_IS_LITTLE_ENDIAN_IEEE754) && \ - !defined(DOUBLE_IS_BIG_ENDIAN_IEEE754) && \ - !defined(DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754) -#define PY_NO_SHORT_FLOAT_REPR -#endif - -/* double rounding is symptomatic of use of extended precision on x86. If - we're seeing double rounding, and we don't have any mechanism available for - changing the FPU rounding precision, then don't use Python/dtoa.c. */ -#if defined(X87_DOUBLE_ROUNDING) && !defined(HAVE_PY_SET_53BIT_PRECISION) -#define PY_NO_SHORT_FLOAT_REPR -#endif - -/* Py_DEPRECATED(version) - * Declare a variable, type, or function deprecated. - * Usage: - * extern int old_var Py_DEPRECATED(2.3); - * typedef int T1 Py_DEPRECATED(2.4); - * extern int x() Py_DEPRECATED(2.5); - */ -#if defined(__GNUC__) && ((__GNUC__ >= 4) || \ - (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)) -#define Py_DEPRECATED(VERSION_UNUSED) __attribute__((__deprecated__)) -#else -#define Py_DEPRECATED(VERSION_UNUSED) -#endif - -/************************************************************************** -Prototypes that are missing from the standard include files on some systems -(and possibly only some versions of such systems.) - -Please be conservative with adding new ones, document them and enclose them -in platform-specific #ifdefs. -**************************************************************************/ - -#ifdef SOLARIS -/* Unchecked */ -extern int gethostname(char *, int); -#endif - -#ifdef __BEOS__ -/* Unchecked */ -/* It's in the libs, but not the headers... - [cjh] */ -int shutdown( int, int ); -#endif - -#ifdef HAVE__GETPTY -#include <sys/types.h> /* we need to import mode_t */ -extern char * _getpty(int *, int, mode_t, int); -#endif - -/* On QNX 6, struct termio must be declared by including sys/termio.h - if TCGETA, TCSETA, TCSETAW, or TCSETAF are used. sys/termio.h must - be included before termios.h or it will generate an error. */ -#if defined(HAVE_SYS_TERMIO_H) && !defined(__hpux) -#include <sys/termio.h> -#endif - -#if defined(HAVE_OPENPTY) || defined(HAVE_FORKPTY) -#if !defined(HAVE_PTY_H) && !defined(HAVE_LIBUTIL_H) && !defined(HAVE_UTIL_H) -/* BSDI does not supply a prototype for the 'openpty' and 'forkpty' - functions, even though they are included in libutil. */ -#include <termios.h> -extern int openpty(int *, int *, char *, struct termios *, struct winsize *); -extern pid_t forkpty(int *, char *, struct termios *, struct winsize *); -#endif /* !defined(HAVE_PTY_H) && !defined(HAVE_LIBUTIL_H) */ -#endif /* defined(HAVE_OPENPTY) || defined(HAVE_FORKPTY) */ - - -/* These are pulled from various places. It isn't obvious on what platforms - they are necessary, nor what the exact prototype should look like (which - is likely to vary between platforms!) If you find you need one of these - declarations, please move them to a platform-specific block and include - proper prototypes. */ -#if 0 - -/* From Modules/resource.c */ -extern int getrusage(); -extern int getpagesize(); - -/* From Python/sysmodule.c and Modules/posixmodule.c */ -extern int fclose(FILE *); - -/* From Modules/posixmodule.c */ -extern int fdatasync(int); -#endif /* 0 */ - - -/* On 4.4BSD-descendants, ctype functions serves the whole range of - * wchar_t character set rather than single byte code points only. - * This characteristic can break some operations of string object - * including str.upper() and str.split() on UTF-8 locales. This - * workaround was provided by Tim Robbins of FreeBSD project. - */ - -#ifdef __FreeBSD__ -#include <osreldate.h> -#if __FreeBSD_version > 500039 -# define _PY_PORT_CTYPE_UTF8_ISSUE -#endif -#endif - - -#if defined(__APPLE__) -# define _PY_PORT_CTYPE_UTF8_ISSUE -#endif - -#ifdef _PY_PORT_CTYPE_UTF8_ISSUE -#include <ctype.h> -#include <wctype.h> -#undef isalnum -#define isalnum(c) iswalnum(btowc(c)) -#undef isalpha -#define isalpha(c) iswalpha(btowc(c)) -#undef islower -#define islower(c) iswlower(btowc(c)) -#undef isspace -#define isspace(c) iswspace(btowc(c)) -#undef isupper -#define isupper(c) iswupper(btowc(c)) -#undef tolower -#define tolower(c) towlower(btowc(c)) -#undef toupper -#define toupper(c) towupper(btowc(c)) -#endif - - -/* Declarations for symbol visibility. - - PyAPI_FUNC(type): Declares a public Python API function and return type - PyAPI_DATA(type): Declares public Python data and its type - PyMODINIT_FUNC: A Python module init function. If these functions are - inside the Python core, they are private to the core. - If in an extension module, it may be declared with - external linkage depending on the platform. - - As a number of platforms support/require "__declspec(dllimport/dllexport)", - we support a HAVE_DECLSPEC_DLL macro to save duplication. -*/ - -/* - All windows ports, except cygwin, are handled in PC/pyconfig.h. - - BeOS and cygwin are the only other autoconf platform requiring special - linkage handling and both of these use __declspec(). -*/ -#if defined(__CYGWIN__) || defined(__BEOS__) -# define HAVE_DECLSPEC_DLL -#endif - -/* only get special linkage if built as shared or platform is Cygwin */ -#if defined(Py_ENABLE_SHARED) || defined(__CYGWIN__) -# if defined(HAVE_DECLSPEC_DLL) -# ifdef Py_BUILD_CORE -# define PyAPI_FUNC(RTYPE) __declspec(dllexport) RTYPE -# define PyAPI_DATA(RTYPE) extern __declspec(dllexport) RTYPE - /* module init functions inside the core need no external linkage */ - /* except for Cygwin to handle embedding (FIXME: BeOS too?) */ -# if defined(__CYGWIN__) -# define PyMODINIT_FUNC __declspec(dllexport) void -# else /* __CYGWIN__ */ -# define PyMODINIT_FUNC void -# endif /* __CYGWIN__ */ -# else /* Py_BUILD_CORE */ - /* Building an extension module, or an embedded situation */ - /* public Python functions and data are imported */ - /* Under Cygwin, auto-import functions to prevent compilation */ - /* failures similar to those described at the bottom of 4.1: */ - /* http://docs.python.org/extending/windows.html#a-cookbook-approach */ -# if !defined(__CYGWIN__) -# define PyAPI_FUNC(RTYPE) __declspec(dllimport) RTYPE -# endif /* !__CYGWIN__ */ -# define PyAPI_DATA(RTYPE) extern __declspec(dllimport) RTYPE - /* module init functions outside the core must be exported */ -# if defined(__cplusplus) -# define PyMODINIT_FUNC extern "C" __declspec(dllexport) void -# else /* __cplusplus */ -# define PyMODINIT_FUNC __declspec(dllexport) void -# endif /* __cplusplus */ -# endif /* Py_BUILD_CORE */ -# endif /* HAVE_DECLSPEC */ -#endif /* Py_ENABLE_SHARED */ - -/* If no external linkage macros defined by now, create defaults */ -#ifndef PyAPI_FUNC -# define PyAPI_FUNC(RTYPE) RTYPE -#endif -#ifndef PyAPI_DATA -# define PyAPI_DATA(RTYPE) extern RTYPE -#endif -#ifndef PyMODINIT_FUNC -# if defined(__cplusplus) -# define PyMODINIT_FUNC extern "C" void -# else /* __cplusplus */ -# define PyMODINIT_FUNC void -# endif /* __cplusplus */ -#endif - -/* Deprecated DL_IMPORT and DL_EXPORT macros */ -#if defined(Py_ENABLE_SHARED) && defined (HAVE_DECLSPEC_DLL) -# if defined(Py_BUILD_CORE) -# define DL_IMPORT(RTYPE) __declspec(dllexport) RTYPE -# define DL_EXPORT(RTYPE) __declspec(dllexport) RTYPE -# else -# define DL_IMPORT(RTYPE) __declspec(dllimport) RTYPE -# define DL_EXPORT(RTYPE) __declspec(dllexport) RTYPE -# endif -#endif -#ifndef DL_EXPORT -# define DL_EXPORT(RTYPE) RTYPE -#endif -#ifndef DL_IMPORT -# define DL_IMPORT(RTYPE) RTYPE -#endif -/* End of deprecated DL_* macros */ - -/* If the fd manipulation macros aren't defined, - here is a set that should do the job */ - -#if 0 /* disabled and probably obsolete */ - -#ifndef FD_SETSIZE -#define FD_SETSIZE 256 -#endif - -#ifndef FD_SET - -typedef long fd_mask; - -#define NFDBITS (sizeof(fd_mask) * NBBY) /* bits per mask */ -#ifndef howmany -#define howmany(x, y) (((x)+((y)-1))/(y)) -#endif /* howmany */ - -typedef struct fd_set { - fd_mask fds_bits[howmany(FD_SETSIZE, NFDBITS)]; -} fd_set; - -#define FD_SET(n, p) ((p)->fds_bits[(n)/NFDBITS] |= (1 << ((n) % NFDBITS))) -#define FD_CLR(n, p) ((p)->fds_bits[(n)/NFDBITS] &= ~(1 << ((n) % NFDBITS))) -#define FD_ISSET(n, p) ((p)->fds_bits[(n)/NFDBITS] & (1 << ((n) % NFDBITS))) -#define FD_ZERO(p) memset((char *)(p), '\0', sizeof(*(p))) - -#endif /* FD_SET */ - -#endif /* fd manipulation macros */ - - -/* limits.h constants that may be missing */ - -#ifndef INT_MAX -#define INT_MAX 2147483647 -#endif - -#ifndef LONG_MAX -#if SIZEOF_LONG == 4 -#define LONG_MAX 0X7FFFFFFFL -#elif SIZEOF_LONG == 8 -#define LONG_MAX 0X7FFFFFFFFFFFFFFFL -#else -#error "could not set LONG_MAX in pyport.h" -#endif -#endif - -#ifndef LONG_MIN -#define LONG_MIN (-LONG_MAX-1) -#endif - -#ifndef LONG_BIT -#define LONG_BIT (8 * SIZEOF_LONG) -#endif - -#if LONG_BIT != 8 * SIZEOF_LONG -/* 04-Oct-2000 LONG_BIT is apparently (mis)defined as 64 on some recent - * 32-bit platforms using gcc. We try to catch that here at compile-time - * rather than waiting for integer multiplication to trigger bogus - * overflows. - */ -#error "LONG_BIT definition appears wrong for platform (bad gcc/glibc config?)." -#endif - -#ifdef __cplusplus -} -#endif - -/* - * Hide GCC attributes from compilers that don't support them. - */ -#if (!defined(__GNUC__) || __GNUC__ < 2 || \ - (__GNUC__ == 2 && __GNUC_MINOR__ < 7) ) && \ - !defined(RISCOS) -#define Py_GCC_ATTRIBUTE(x) -#else -#define Py_GCC_ATTRIBUTE(x) __attribute__(x) -#endif - -/* - * Add PyArg_ParseTuple format where available. - */ -#ifdef HAVE_ATTRIBUTE_FORMAT_PARSETUPLE -#define Py_FORMAT_PARSETUPLE(func,p1,p2) __attribute__((format(func,p1,p2))) -#else -#define Py_FORMAT_PARSETUPLE(func,p1,p2) -#endif - -/* - * Specify alignment on compilers that support it. - */ -#if defined(__GNUC__) && __GNUC__ >= 3 -#define Py_ALIGNED(x) __attribute__((aligned(x))) -#else -#define Py_ALIGNED(x) -#endif - -/* Eliminate end-of-loop code not reached warnings from SunPro C - * when using do{...}while(0) macros - */ -#ifdef __SUNPRO_C -#pragma error_messages (off,E_END_OF_LOOP_CODE_NOT_REACHED) -#endif - -/* - * Older Microsoft compilers don't support the C99 long long literal suffixes, - * so these will be defined in PC/pyconfig.h for those compilers. - */ -#ifndef Py_LL -#define Py_LL(x) x##LL -#endif - -#ifndef Py_ULL -#define Py_ULL(x) Py_LL(x##U) -#endif - -#endif /* Py_PYPORT_H */
--- a/test/include/python2.7/pystate.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,200 +0,0 @@ - -/* Thread and interpreter state structures and their interfaces */ - - -#ifndef Py_PYSTATE_H -#define Py_PYSTATE_H -#ifdef __cplusplus -extern "C" { -#endif - -/* State shared between threads */ - -struct _ts; /* Forward */ -struct _is; /* Forward */ - -typedef struct _is { - - struct _is *next; - struct _ts *tstate_head; - - PyObject *modules; - PyObject *sysdict; - PyObject *builtins; - PyObject *modules_reloading; - - PyObject *codec_search_path; - PyObject *codec_search_cache; - PyObject *codec_error_registry; - -#ifdef HAVE_DLOPEN - int dlopenflags; -#endif -#ifdef WITH_TSC - int tscdump; -#endif - -} PyInterpreterState; - - -/* State unique per thread */ - -struct _frame; /* Avoid including frameobject.h */ - -/* Py_tracefunc return -1 when raising an exception, or 0 for success. */ -typedef int (*Py_tracefunc)(PyObject *, struct _frame *, int, PyObject *); - -/* The following values are used for 'what' for tracefunc functions: */ -#define PyTrace_CALL 0 -#define PyTrace_EXCEPTION 1 -#define PyTrace_LINE 2 -#define PyTrace_RETURN 3 -#define PyTrace_C_CALL 4 -#define PyTrace_C_EXCEPTION 5 -#define PyTrace_C_RETURN 6 - -typedef struct _ts { - /* See Python/ceval.c for comments explaining most fields */ - - struct _ts *next; - PyInterpreterState *interp; - - struct _frame *frame; - int recursion_depth; - /* 'tracing' keeps track of the execution depth when tracing/profiling. - This is to prevent the actual trace/profile code from being recorded in - the trace/profile. */ - int tracing; - int use_tracing; - - Py_tracefunc c_profilefunc; - Py_tracefunc c_tracefunc; - PyObject *c_profileobj; - PyObject *c_traceobj; - - PyObject *curexc_type; - PyObject *curexc_value; - PyObject *curexc_traceback; - - PyObject *exc_type; - PyObject *exc_value; - PyObject *exc_traceback; - - PyObject *dict; /* Stores per-thread state */ - - /* tick_counter is incremented whenever the check_interval ticker - * reaches zero. The purpose is to give a useful measure of the number - * of interpreted bytecode instructions in a given thread. This - * extremely lightweight statistic collector may be of interest to - * profilers (like psyco.jit()), although nothing in the core uses it. - */ - int tick_counter; - - int gilstate_counter; - - PyObject *async_exc; /* Asynchronous exception to raise */ - long thread_id; /* Thread id where this tstate was created */ - - int trash_delete_nesting; - PyObject *trash_delete_later; - - /* XXX signal handlers should also be here */ - -} PyThreadState; - - -PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void); -PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *); -PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *); - -PyAPI_FUNC(PyThreadState *) PyThreadState_New(PyInterpreterState *); -PyAPI_FUNC(PyThreadState *) _PyThreadState_Prealloc(PyInterpreterState *); -PyAPI_FUNC(void) _PyThreadState_Init(PyThreadState *); -PyAPI_FUNC(void) PyThreadState_Clear(PyThreadState *); -PyAPI_FUNC(void) PyThreadState_Delete(PyThreadState *); -#ifdef WITH_THREAD -PyAPI_FUNC(void) PyThreadState_DeleteCurrent(void); -#endif - -PyAPI_FUNC(PyThreadState *) PyThreadState_Get(void); -PyAPI_FUNC(PyThreadState *) PyThreadState_Swap(PyThreadState *); -PyAPI_FUNC(PyObject *) PyThreadState_GetDict(void); -PyAPI_FUNC(int) PyThreadState_SetAsyncExc(long, PyObject *); - - -/* Variable and macro for in-line access to current thread state */ - -PyAPI_DATA(PyThreadState *) _PyThreadState_Current; - -#ifdef Py_DEBUG -#define PyThreadState_GET() PyThreadState_Get() -#else -#define PyThreadState_GET() (_PyThreadState_Current) -#endif - -typedef - enum {PyGILState_LOCKED, PyGILState_UNLOCKED} - PyGILState_STATE; - -/* Ensure that the current thread is ready to call the Python - C API, regardless of the current state of Python, or of its - thread lock. This may be called as many times as desired - by a thread so long as each call is matched with a call to - PyGILState_Release(). In general, other thread-state APIs may - be used between _Ensure() and _Release() calls, so long as the - thread-state is restored to its previous state before the Release(). - For example, normal use of the Py_BEGIN_ALLOW_THREADS/ - Py_END_ALLOW_THREADS macros are acceptable. - - The return value is an opaque "handle" to the thread state when - PyGILState_Ensure() was called, and must be passed to - PyGILState_Release() to ensure Python is left in the same state. Even - though recursive calls are allowed, these handles can *not* be shared - - each unique call to PyGILState_Ensure must save the handle for its - call to PyGILState_Release. - - When the function returns, the current thread will hold the GIL. - - Failure is a fatal error. -*/ -PyAPI_FUNC(PyGILState_STATE) PyGILState_Ensure(void); - -/* Release any resources previously acquired. After this call, Python's - state will be the same as it was prior to the corresponding - PyGILState_Ensure() call (but generally this state will be unknown to - the caller, hence the use of the GILState API.) - - Every call to PyGILState_Ensure must be matched by a call to - PyGILState_Release on the same thread. -*/ -PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE); - -/* Helper/diagnostic function - get the current thread state for - this thread. May return NULL if no GILState API has been used - on the current thread. Note that the main thread always has such a - thread-state, even if no auto-thread-state call has been made - on the main thread. -*/ -PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void); - -/* The implementation of sys._current_frames() Returns a dict mapping - thread id to that thread's current frame. -*/ -PyAPI_FUNC(PyObject *) _PyThread_CurrentFrames(void); - -/* Routines for advanced debuggers, requested by David Beazley. - Don't use unless you know what you are doing! */ -PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Head(void); -PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Next(PyInterpreterState *); -PyAPI_FUNC(PyThreadState *) PyInterpreterState_ThreadHead(PyInterpreterState *); -PyAPI_FUNC(PyThreadState *) PyThreadState_Next(PyThreadState *); - -typedef struct _frame *(*PyThreadFrameGetter)(PyThreadState *self_); - -/* hook for PyEval_GetFrame(), requested for Psyco */ -PyAPI_DATA(PyThreadFrameGetter) _PyThreadState_GetFrame; - -#ifdef __cplusplus -} -#endif -#endif /* !Py_PYSTATE_H */
--- a/test/include/python2.7/pystrcmp.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -#ifndef Py_STRCMP_H -#define Py_STRCMP_H - -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_FUNC(int) PyOS_mystrnicmp(const char *, const char *, Py_ssize_t); -PyAPI_FUNC(int) PyOS_mystricmp(const char *, const char *); - -#if defined(MS_WINDOWS) || defined(PYOS_OS2) -#define PyOS_strnicmp strnicmp -#define PyOS_stricmp stricmp -#else -#define PyOS_strnicmp PyOS_mystrnicmp -#define PyOS_stricmp PyOS_mystricmp -#endif - -#ifdef __cplusplus -} -#endif - -#endif /* !Py_STRCMP_H */
--- a/test/include/python2.7/pystrtod.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -#ifndef Py_STRTOD_H -#define Py_STRTOD_H - -#ifdef __cplusplus -extern "C" { -#endif - - -PyAPI_FUNC(double) PyOS_ascii_strtod(const char *str, char **ptr); -PyAPI_FUNC(double) PyOS_ascii_atof(const char *str); - -/* Deprecated in 2.7 and 3.1. Will disappear in 2.8 (if it exists) and 3.2 */ -PyAPI_FUNC(char *) PyOS_ascii_formatd(char *buffer, size_t buf_len, - const char *format, double d); -PyAPI_FUNC(double) PyOS_string_to_double(const char *str, - char **endptr, - PyObject *overflow_exception); - -/* The caller is responsible for calling PyMem_Free to free the buffer - that's is returned. */ -PyAPI_FUNC(char *) PyOS_double_to_string(double val, - char format_code, - int precision, - int flags, - int *type); - -PyAPI_FUNC(double) _Py_parse_inf_or_nan(const char *p, char **endptr); - - -/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */ -#define Py_DTSF_SIGN 0x01 /* always add the sign */ -#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */ -#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code - specific */ - -/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */ -#define Py_DTST_FINITE 0 -#define Py_DTST_INFINITE 1 -#define Py_DTST_NAN 2 - -#ifdef __cplusplus -} -#endif - -#endif /* !Py_STRTOD_H */
--- a/test/include/python2.7/pythonrun.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,182 +0,0 @@ - -/* Interfaces to parse and execute pieces of python code */ - -#ifndef Py_PYTHONRUN_H -#define Py_PYTHONRUN_H -#ifdef __cplusplus -extern "C" { -#endif - -#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \ - CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \ - CO_FUTURE_UNICODE_LITERALS) -#define PyCF_MASK_OBSOLETE (CO_NESTED) -#define PyCF_SOURCE_IS_UTF8 0x0100 -#define PyCF_DONT_IMPLY_DEDENT 0x0200 -#define PyCF_ONLY_AST 0x0400 - -typedef struct { - int cf_flags; /* bitmask of CO_xxx flags relevant to future */ -} PyCompilerFlags; - -PyAPI_FUNC(void) Py_SetProgramName(char *); -PyAPI_FUNC(char *) Py_GetProgramName(void); - -PyAPI_FUNC(void) Py_SetPythonHome(char *); -PyAPI_FUNC(char *) Py_GetPythonHome(void); - -PyAPI_FUNC(void) Py_Initialize(void); -PyAPI_FUNC(void) Py_InitializeEx(int); -PyAPI_FUNC(void) Py_Finalize(void); -PyAPI_FUNC(int) Py_IsInitialized(void); -PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void); -PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *); - -PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_AnyFileExFlags(FILE *, const char *, int, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_SimpleFileExFlags(FILE *, const char *, int, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_InteractiveOneFlags(FILE *, const char *, PyCompilerFlags *); -PyAPI_FUNC(int) PyRun_InteractiveLoopFlags(FILE *, const char *, PyCompilerFlags *); - -PyAPI_FUNC(struct _mod *) PyParser_ASTFromString(const char *, const char *, - int, PyCompilerFlags *flags, - PyArena *); -PyAPI_FUNC(struct _mod *) PyParser_ASTFromFile(FILE *, const char *, int, - char *, char *, - PyCompilerFlags *, int *, - PyArena *); -#define PyParser_SimpleParseString(S, B) \ - PyParser_SimpleParseStringFlags(S, B, 0) -#define PyParser_SimpleParseFile(FP, S, B) \ - PyParser_SimpleParseFileFlags(FP, S, B, 0) -PyAPI_FUNC(struct _node *) PyParser_SimpleParseStringFlags(const char *, int, - int); -PyAPI_FUNC(struct _node *) PyParser_SimpleParseFileFlags(FILE *, const char *, - int, int); - -PyAPI_FUNC(PyObject *) PyRun_StringFlags(const char *, int, PyObject *, - PyObject *, PyCompilerFlags *); - -PyAPI_FUNC(PyObject *) PyRun_FileExFlags(FILE *, const char *, int, - PyObject *, PyObject *, int, - PyCompilerFlags *); - -#define Py_CompileString(str, p, s) Py_CompileStringFlags(str, p, s, NULL) -PyAPI_FUNC(PyObject *) Py_CompileStringFlags(const char *, const char *, int, - PyCompilerFlags *); -PyAPI_FUNC(struct symtable *) Py_SymtableString(const char *, const char *, int); - -PyAPI_FUNC(void) PyErr_Print(void); -PyAPI_FUNC(void) PyErr_PrintEx(int); -PyAPI_FUNC(void) PyErr_Display(PyObject *, PyObject *, PyObject *); - -PyAPI_FUNC(int) Py_AtExit(void (*func)(void)); - -PyAPI_FUNC(void) Py_Exit(int); - -PyAPI_FUNC(int) Py_FdIsInteractive(FILE *, const char *); - -/* Bootstrap */ -PyAPI_FUNC(int) Py_Main(int argc, char **argv); - -/* Use macros for a bunch of old variants */ -#define PyRun_String(str, s, g, l) PyRun_StringFlags(str, s, g, l, NULL) -#define PyRun_AnyFile(fp, name) PyRun_AnyFileExFlags(fp, name, 0, NULL) -#define PyRun_AnyFileEx(fp, name, closeit) \ - PyRun_AnyFileExFlags(fp, name, closeit, NULL) -#define PyRun_AnyFileFlags(fp, name, flags) \ - PyRun_AnyFileExFlags(fp, name, 0, flags) -#define PyRun_SimpleString(s) PyRun_SimpleStringFlags(s, NULL) -#define PyRun_SimpleFile(f, p) PyRun_SimpleFileExFlags(f, p, 0, NULL) -#define PyRun_SimpleFileEx(f, p, c) PyRun_SimpleFileExFlags(f, p, c, NULL) -#define PyRun_InteractiveOne(f, p) PyRun_InteractiveOneFlags(f, p, NULL) -#define PyRun_InteractiveLoop(f, p) PyRun_InteractiveLoopFlags(f, p, NULL) -#define PyRun_File(fp, p, s, g, l) \ - PyRun_FileExFlags(fp, p, s, g, l, 0, NULL) -#define PyRun_FileEx(fp, p, s, g, l, c) \ - PyRun_FileExFlags(fp, p, s, g, l, c, NULL) -#define PyRun_FileFlags(fp, p, s, g, l, flags) \ - PyRun_FileExFlags(fp, p, s, g, l, 0, flags) - -/* In getpath.c */ -PyAPI_FUNC(char *) Py_GetProgramFullPath(void); -PyAPI_FUNC(char *) Py_GetPrefix(void); -PyAPI_FUNC(char *) Py_GetExecPrefix(void); -PyAPI_FUNC(char *) Py_GetPath(void); - -/* In their own files */ -PyAPI_FUNC(const char *) Py_GetVersion(void); -PyAPI_FUNC(const char *) Py_GetPlatform(void); -PyAPI_FUNC(const char *) Py_GetCopyright(void); -PyAPI_FUNC(const char *) Py_GetCompiler(void); -PyAPI_FUNC(const char *) Py_GetBuildInfo(void); -PyAPI_FUNC(const char *) _Py_svnversion(void); -PyAPI_FUNC(const char *) Py_SubversionRevision(void); -PyAPI_FUNC(const char *) Py_SubversionShortBranch(void); -PyAPI_FUNC(const char *) _Py_hgidentifier(void); -PyAPI_FUNC(const char *) _Py_hgversion(void); - -/* Internal -- various one-time initializations */ -PyAPI_FUNC(PyObject *) _PyBuiltin_Init(void); -PyAPI_FUNC(PyObject *) _PySys_Init(void); -PyAPI_FUNC(void) _PyImport_Init(void); -PyAPI_FUNC(void) _PyExc_Init(void); -PyAPI_FUNC(void) _PyImportHooks_Init(void); -PyAPI_FUNC(int) _PyFrame_Init(void); -PyAPI_FUNC(int) _PyInt_Init(void); -PyAPI_FUNC(int) _PyLong_Init(void); -PyAPI_FUNC(void) _PyFloat_Init(void); -PyAPI_FUNC(int) PyByteArray_Init(void); -PyAPI_FUNC(void) _PyRandom_Init(void); - -/* Various internal finalizers */ -PyAPI_FUNC(void) _PyExc_Fini(void); -PyAPI_FUNC(void) _PyImport_Fini(void); -PyAPI_FUNC(void) PyMethod_Fini(void); -PyAPI_FUNC(void) PyFrame_Fini(void); -PyAPI_FUNC(void) PyCFunction_Fini(void); -PyAPI_FUNC(void) PyDict_Fini(void); -PyAPI_FUNC(void) PyTuple_Fini(void); -PyAPI_FUNC(void) PyList_Fini(void); -PyAPI_FUNC(void) PySet_Fini(void); -PyAPI_FUNC(void) PyString_Fini(void); -PyAPI_FUNC(void) PyInt_Fini(void); -PyAPI_FUNC(void) PyFloat_Fini(void); -PyAPI_FUNC(void) PyOS_FiniInterrupts(void); -PyAPI_FUNC(void) PyByteArray_Fini(void); -PyAPI_FUNC(void) _PyRandom_Fini(void); - -/* Stuff with no proper home (yet) */ -PyAPI_FUNC(char *) PyOS_Readline(FILE *, FILE *, char *); -PyAPI_DATA(int) (*PyOS_InputHook)(void); -PyAPI_DATA(char) *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, char *); -PyAPI_DATA(PyThreadState*) _PyOS_ReadlineTState; - -/* Stack size, in "pointers" (so we get extra safety margins - on 64-bit platforms). On a 32-bit platform, this translates - to an 8k margin. */ -#define PYOS_STACK_MARGIN 2048 - -#if defined(WIN32) && !defined(MS_WIN64) && defined(_MSC_VER) && _MSC_VER >= 1300 -/* Enable stack checking under Microsoft C */ -#define USE_STACKCHECK -#endif - -#ifdef USE_STACKCHECK -/* Check that we aren't overflowing our stack */ -PyAPI_FUNC(int) PyOS_CheckStack(void); -#endif - -/* Signals */ -typedef void (*PyOS_sighandler_t)(int); -PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); -PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); - -/* Random */ -PyAPI_FUNC(int) _PyOS_URandom (void *buffer, Py_ssize_t size); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_PYTHONRUN_H */
--- a/test/include/python2.7/pythread.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ - -#ifndef Py_PYTHREAD_H -#define Py_PYTHREAD_H - -typedef void *PyThread_type_lock; -typedef void *PyThread_type_sema; - -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_FUNC(void) PyThread_init_thread(void); -PyAPI_FUNC(long) PyThread_start_new_thread(void (*)(void *), void *); -PyAPI_FUNC(void) PyThread_exit_thread(void); -PyAPI_FUNC(long) PyThread_get_thread_ident(void); - -PyAPI_FUNC(PyThread_type_lock) PyThread_allocate_lock(void); -PyAPI_FUNC(void) PyThread_free_lock(PyThread_type_lock); -PyAPI_FUNC(int) PyThread_acquire_lock(PyThread_type_lock, int); -#define WAIT_LOCK 1 -#define NOWAIT_LOCK 0 -PyAPI_FUNC(void) PyThread_release_lock(PyThread_type_lock); - -PyAPI_FUNC(size_t) PyThread_get_stacksize(void); -PyAPI_FUNC(int) PyThread_set_stacksize(size_t); - -/* Thread Local Storage (TLS) API */ -PyAPI_FUNC(int) PyThread_create_key(void); -PyAPI_FUNC(void) PyThread_delete_key(int); -PyAPI_FUNC(int) PyThread_set_key_value(int, void *); -PyAPI_FUNC(void *) PyThread_get_key_value(int); -PyAPI_FUNC(void) PyThread_delete_key_value(int key); - -/* Cleanup after a fork */ -PyAPI_FUNC(void) PyThread_ReInitTLS(void); - -#ifdef __cplusplus -} -#endif - -#endif /* !Py_PYTHREAD_H */
--- a/test/include/python2.7/rangeobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,28 +0,0 @@ - -/* Range object interface */ - -#ifndef Py_RANGEOBJECT_H -#define Py_RANGEOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -/* This is about the type 'xrange', not the built-in function range(), which - returns regular lists. */ - -/* -A range object represents an integer range. This is an immutable object; -a range cannot change its value after creation. - -Range objects behave like the corresponding tuple objects except that -they are represented by a start, stop, and step datamembers. -*/ - -PyAPI_DATA(PyTypeObject) PyRange_Type; - -#define PyRange_Check(op) (Py_TYPE(op) == &PyRange_Type) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_RANGEOBJECT_H */
--- a/test/include/python2.7/setobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ -/* Set object interface */ - -#ifndef Py_SETOBJECT_H -#define Py_SETOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* -There are three kinds of slots in the table: - -1. Unused: key == NULL -2. Active: key != NULL and key != dummy -3. Dummy: key == dummy - -Note: .pop() abuses the hash field of an Unused or Dummy slot to -hold a search finger. The hash field of Unused or Dummy slots has -no meaning otherwise. -*/ - -#define PySet_MINSIZE 8 - -typedef struct { - long hash; /* cached hash code for the entry key */ - PyObject *key; -} setentry; - - -/* -This data structure is shared by set and frozenset objects. -*/ - -typedef struct _setobject PySetObject; -struct _setobject { - PyObject_HEAD - - Py_ssize_t fill; /* # Active + # Dummy */ - Py_ssize_t used; /* # Active */ - - /* The table contains mask + 1 slots, and that's a power of 2. - * We store the mask instead of the size because the mask is more - * frequently needed. - */ - Py_ssize_t mask; - - /* table points to smalltable for small tables, else to - * additional malloc'ed memory. table is never NULL! This rule - * saves repeated runtime null-tests. - */ - setentry *table; - setentry *(*lookup)(PySetObject *so, PyObject *key, long hash); - setentry smalltable[PySet_MINSIZE]; - - long hash; /* only used by frozenset objects */ - PyObject *weakreflist; /* List of weak references */ -}; - -PyAPI_DATA(PyTypeObject) PySet_Type; -PyAPI_DATA(PyTypeObject) PyFrozenSet_Type; - -/* Invariants for frozensets: - * data is immutable. - * hash is the hash of the frozenset or -1 if not computed yet. - * Invariants for sets: - * hash is -1 - */ - -#define PyFrozenSet_CheckExact(ob) (Py_TYPE(ob) == &PyFrozenSet_Type) -#define PyAnySet_CheckExact(ob) \ - (Py_TYPE(ob) == &PySet_Type || Py_TYPE(ob) == &PyFrozenSet_Type) -#define PyAnySet_Check(ob) \ - (Py_TYPE(ob) == &PySet_Type || Py_TYPE(ob) == &PyFrozenSet_Type || \ - PyType_IsSubtype(Py_TYPE(ob), &PySet_Type) || \ - PyType_IsSubtype(Py_TYPE(ob), &PyFrozenSet_Type)) -#define PySet_Check(ob) \ - (Py_TYPE(ob) == &PySet_Type || \ - PyType_IsSubtype(Py_TYPE(ob), &PySet_Type)) -#define PyFrozenSet_Check(ob) \ - (Py_TYPE(ob) == &PyFrozenSet_Type || \ - PyType_IsSubtype(Py_TYPE(ob), &PyFrozenSet_Type)) - -PyAPI_FUNC(PyObject *) PySet_New(PyObject *); -PyAPI_FUNC(PyObject *) PyFrozenSet_New(PyObject *); -PyAPI_FUNC(Py_ssize_t) PySet_Size(PyObject *anyset); -#define PySet_GET_SIZE(so) (((PySetObject *)(so))->used) -PyAPI_FUNC(int) PySet_Clear(PyObject *set); -PyAPI_FUNC(int) PySet_Contains(PyObject *anyset, PyObject *key); -PyAPI_FUNC(int) PySet_Discard(PyObject *set, PyObject *key); -PyAPI_FUNC(int) PySet_Add(PyObject *set, PyObject *key); -PyAPI_FUNC(int) _PySet_Next(PyObject *set, Py_ssize_t *pos, PyObject **key); -PyAPI_FUNC(int) _PySet_NextEntry(PyObject *set, Py_ssize_t *pos, PyObject **key, long *hash); -PyAPI_FUNC(PyObject *) PySet_Pop(PyObject *set); -PyAPI_FUNC(int) _PySet_Update(PyObject *set, PyObject *iterable); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_SETOBJECT_H */
--- a/test/include/python2.7/sliceobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,44 +0,0 @@ -#ifndef Py_SLICEOBJECT_H -#define Py_SLICEOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -/* The unique ellipsis object "..." */ - -PyAPI_DATA(PyObject) _Py_EllipsisObject; /* Don't use this directly */ - -#define Py_Ellipsis (&_Py_EllipsisObject) - -/* Slice object interface */ - -/* - -A slice object containing start, stop, and step data members (the -names are from range). After much talk with Guido, it was decided to -let these be any arbitrary python type. Py_None stands for omitted values. -*/ - -typedef struct { - PyObject_HEAD - PyObject *start, *stop, *step; /* not NULL */ -} PySliceObject; - -PyAPI_DATA(PyTypeObject) PySlice_Type; -PyAPI_DATA(PyTypeObject) PyEllipsis_Type; - -#define PySlice_Check(op) (Py_TYPE(op) == &PySlice_Type) - -PyAPI_FUNC(PyObject *) PySlice_New(PyObject* start, PyObject* stop, - PyObject* step); -PyAPI_FUNC(PyObject *) _PySlice_FromIndices(Py_ssize_t start, Py_ssize_t stop); -PyAPI_FUNC(int) PySlice_GetIndices(PySliceObject *r, Py_ssize_t length, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step); -PyAPI_FUNC(int) PySlice_GetIndicesEx(PySliceObject *r, Py_ssize_t length, - Py_ssize_t *start, Py_ssize_t *stop, - Py_ssize_t *step, Py_ssize_t *slicelength); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_SLICEOBJECT_H */
--- a/test/include/python2.7/stringobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,210 +0,0 @@ - -/* String (str/bytes) object interface */ - -#ifndef Py_STRINGOBJECT_H -#define Py_STRINGOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -#include <stdarg.h> - -/* -Type PyStringObject represents a character string. An extra zero byte is -reserved at the end to ensure it is zero-terminated, but a size is -present so strings with null bytes in them can be represented. This -is an immutable object type. - -There are functions to create new string objects, to test -an object for string-ness, and to get the -string value. The latter function returns a null pointer -if the object is not of the proper type. -There is a variant that takes an explicit size as well as a -variant that assumes a zero-terminated string. Note that none of the -functions should be applied to nil objects. -*/ - -/* Caching the hash (ob_shash) saves recalculation of a string's hash value. - Interning strings (ob_sstate) tries to ensure that only one string - object with a given value exists, so equality tests can be one pointer - comparison. This is generally restricted to strings that "look like" - Python identifiers, although the intern() builtin can be used to force - interning of any string. - Together, these sped the interpreter by up to 20%. */ - -typedef struct { - PyObject_VAR_HEAD - long ob_shash; - int ob_sstate; - char ob_sval[1]; - - /* Invariants: - * ob_sval contains space for 'ob_size+1' elements. - * ob_sval[ob_size] == 0. - * ob_shash is the hash of the string or -1 if not computed yet. - * ob_sstate != 0 iff the string object is in stringobject.c's - * 'interned' dictionary; in this case the two references - * from 'interned' to this object are *not counted* in ob_refcnt. - */ -} PyStringObject; - -#define SSTATE_NOT_INTERNED 0 -#define SSTATE_INTERNED_MORTAL 1 -#define SSTATE_INTERNED_IMMORTAL 2 - -PyAPI_DATA(PyTypeObject) PyBaseString_Type; -PyAPI_DATA(PyTypeObject) PyString_Type; - -#define PyString_Check(op) \ - PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_STRING_SUBCLASS) -#define PyString_CheckExact(op) (Py_TYPE(op) == &PyString_Type) - -PyAPI_FUNC(PyObject *) PyString_FromStringAndSize(const char *, Py_ssize_t); -PyAPI_FUNC(PyObject *) PyString_FromString(const char *); -PyAPI_FUNC(PyObject *) PyString_FromFormatV(const char*, va_list) - Py_GCC_ATTRIBUTE((format(printf, 1, 0))); -PyAPI_FUNC(PyObject *) PyString_FromFormat(const char*, ...) - Py_GCC_ATTRIBUTE((format(printf, 1, 2))); -PyAPI_FUNC(Py_ssize_t) PyString_Size(PyObject *); -PyAPI_FUNC(char *) PyString_AsString(PyObject *); -PyAPI_FUNC(PyObject *) PyString_Repr(PyObject *, int); -PyAPI_FUNC(void) PyString_Concat(PyObject **, PyObject *); -PyAPI_FUNC(void) PyString_ConcatAndDel(PyObject **, PyObject *); -PyAPI_FUNC(int) _PyString_Resize(PyObject **, Py_ssize_t); -PyAPI_FUNC(int) _PyString_Eq(PyObject *, PyObject*); -PyAPI_FUNC(PyObject *) PyString_Format(PyObject *, PyObject *); -PyAPI_FUNC(PyObject *) _PyString_FormatLong(PyObject*, int, int, - int, char**, int*); -PyAPI_FUNC(PyObject *) PyString_DecodeEscape(const char *, Py_ssize_t, - const char *, Py_ssize_t, - const char *); - -PyAPI_FUNC(void) PyString_InternInPlace(PyObject **); -PyAPI_FUNC(void) PyString_InternImmortal(PyObject **); -PyAPI_FUNC(PyObject *) PyString_InternFromString(const char *); -PyAPI_FUNC(void) _Py_ReleaseInternedStrings(void); - -/* Use only if you know it's a string */ -#define PyString_CHECK_INTERNED(op) (((PyStringObject *)(op))->ob_sstate) - -/* Macro, trading safety for speed */ -#define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval) -#define PyString_GET_SIZE(op) Py_SIZE(op) - -/* _PyString_Join(sep, x) is like sep.join(x). sep must be PyStringObject*, - x must be an iterable object. */ -PyAPI_FUNC(PyObject *) _PyString_Join(PyObject *sep, PyObject *x); - -/* --- Generic Codecs ----------------------------------------------------- */ - -/* Create an object by decoding the encoded string s of the - given size. */ - -PyAPI_FUNC(PyObject*) PyString_Decode( - const char *s, /* encoded string */ - Py_ssize_t size, /* size of buffer */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Encodes a char buffer of the given size and returns a - Python object. */ - -PyAPI_FUNC(PyObject*) PyString_Encode( - const char *s, /* string char buffer */ - Py_ssize_t size, /* number of chars to encode */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Encodes a string object and returns the result as Python - object. */ - -PyAPI_FUNC(PyObject*) PyString_AsEncodedObject( - PyObject *str, /* string object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Encodes a string object and returns the result as Python string - object. - - If the codec returns a Unicode object, the object is converted - back to a string using the default encoding. - - DEPRECATED - use PyString_AsEncodedObject() instead. */ - -PyAPI_FUNC(PyObject*) PyString_AsEncodedString( - PyObject *str, /* string object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Decodes a string object and returns the result as Python - object. */ - -PyAPI_FUNC(PyObject*) PyString_AsDecodedObject( - PyObject *str, /* string object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Decodes a string object and returns the result as Python string - object. - - If the codec returns a Unicode object, the object is converted - back to a string using the default encoding. - - DEPRECATED - use PyString_AsDecodedObject() instead. */ - -PyAPI_FUNC(PyObject*) PyString_AsDecodedString( - PyObject *str, /* string object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Provides access to the internal data buffer and size of a string - object or the default encoded version of a Unicode object. Passing - NULL as *len parameter will force the string buffer to be - 0-terminated (passing a string with embedded NULL characters will - cause an exception). */ - -PyAPI_FUNC(int) PyString_AsStringAndSize( - register PyObject *obj, /* string or Unicode object */ - register char **s, /* pointer to buffer variable */ - register Py_ssize_t *len /* pointer to length variable or NULL - (only possible for 0-terminated - strings) */ - ); - - -/* Using the current locale, insert the thousands grouping - into the string pointed to by buffer. For the argument descriptions, - see Objects/stringlib/localeutil.h */ -PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGroupingLocale(char *buffer, - Py_ssize_t n_buffer, - char *digits, - Py_ssize_t n_digits, - Py_ssize_t min_width); - -/* Using explicit passed-in values, insert the thousands grouping - into the string pointed to by buffer. For the argument descriptions, - see Objects/stringlib/localeutil.h */ -PyAPI_FUNC(Py_ssize_t) _PyString_InsertThousandsGrouping(char *buffer, - Py_ssize_t n_buffer, - char *digits, - Py_ssize_t n_digits, - Py_ssize_t min_width, - const char *grouping, - const char *thousands_sep); - -/* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyBytes_FormatAdvanced(PyObject *obj, - char *format_spec, - Py_ssize_t format_spec_len); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_STRINGOBJECT_H */
--- a/test/include/python2.7/structmember.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ -#ifndef Py_STRUCTMEMBER_H -#define Py_STRUCTMEMBER_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Interface to map C struct members to Python object attributes */ - -#include <stddef.h> /* For offsetof */ - -/* The offsetof() macro calculates the offset of a structure member - in its structure. Unfortunately this cannot be written down - portably, hence it is provided by a Standard C header file. - For pre-Standard C compilers, here is a version that usually works - (but watch out!): */ - -#ifndef offsetof -#define offsetof(type, member) ( (int) & ((type*)0) -> member ) -#endif - -/* An array of memberlist structures defines the name, type and offset - of selected members of a C structure. These can be read by - PyMember_Get() and set by PyMember_Set() (except if their READONLY flag - is set). The array must be terminated with an entry whose name - pointer is NULL. */ - -struct memberlist { - /* Obsolete version, for binary backwards compatibility */ - char *name; - int type; - int offset; - int flags; -}; - -typedef struct PyMemberDef { - /* Current version, use this */ - char *name; - int type; - Py_ssize_t offset; - int flags; - char *doc; -} PyMemberDef; - -/* Types */ -#define T_SHORT 0 -#define T_INT 1 -#define T_LONG 2 -#define T_FLOAT 3 -#define T_DOUBLE 4 -#define T_STRING 5 -#define T_OBJECT 6 -/* XXX the ordering here is weird for binary compatibility */ -#define T_CHAR 7 /* 1-character string */ -#define T_BYTE 8 /* 8-bit signed int */ -/* unsigned variants: */ -#define T_UBYTE 9 -#define T_USHORT 10 -#define T_UINT 11 -#define T_ULONG 12 - -/* Added by Jack: strings contained in the structure */ -#define T_STRING_INPLACE 13 - -/* Added by Lillo: bools contained in the structure (assumed char) */ -#define T_BOOL 14 - -#define T_OBJECT_EX 16 /* Like T_OBJECT, but raises AttributeError - when the value is NULL, instead of - converting to None. */ -#ifdef HAVE_LONG_LONG -#define T_LONGLONG 17 -#define T_ULONGLONG 18 -#endif /* HAVE_LONG_LONG */ - -#define T_PYSSIZET 19 /* Py_ssize_t */ - - -/* Flags */ -#define READONLY 1 -#define RO READONLY /* Shorthand */ -#define READ_RESTRICTED 2 -#define PY_WRITE_RESTRICTED 4 -#define RESTRICTED (READ_RESTRICTED | PY_WRITE_RESTRICTED) - - -/* Obsolete API, for binary backwards compatibility */ -PyAPI_FUNC(PyObject *) PyMember_Get(const char *, struct memberlist *, const char *); -PyAPI_FUNC(int) PyMember_Set(char *, struct memberlist *, const char *, PyObject *); - -/* Current API, use this */ -PyAPI_FUNC(PyObject *) PyMember_GetOne(const char *, struct PyMemberDef *); -PyAPI_FUNC(int) PyMember_SetOne(char *, struct PyMemberDef *, PyObject *); - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_STRUCTMEMBER_H */
--- a/test/include/python2.7/structseq.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,41 +0,0 @@ - -/* Tuple object interface */ - -#ifndef Py_STRUCTSEQ_H -#define Py_STRUCTSEQ_H -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct PyStructSequence_Field { - char *name; - char *doc; -} PyStructSequence_Field; - -typedef struct PyStructSequence_Desc { - char *name; - char *doc; - struct PyStructSequence_Field *fields; - int n_in_sequence; -} PyStructSequence_Desc; - -extern char* PyStructSequence_UnnamedField; - -PyAPI_FUNC(void) PyStructSequence_InitType(PyTypeObject *type, - PyStructSequence_Desc *desc); - -PyAPI_FUNC(PyObject *) PyStructSequence_New(PyTypeObject* type); - -typedef struct { - PyObject_VAR_HEAD - PyObject *ob_item[1]; -} PyStructSequence; - -/* Macro, *only* to be used to fill in brand new objects */ -#define PyStructSequence_SET_ITEM(op, i, v) \ - (((PyStructSequence *)(op))->ob_item[i] = v) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_STRUCTSEQ_H */
--- a/test/include/python2.7/symtable.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,98 +0,0 @@ -#ifndef Py_SYMTABLE_H -#define Py_SYMTABLE_H - -#ifdef __cplusplus -extern "C" { -#endif - -typedef enum _block_type { FunctionBlock, ClassBlock, ModuleBlock } - _Py_block_ty; - -struct _symtable_entry; - -struct symtable { - const char *st_filename; /* name of file being compiled */ - struct _symtable_entry *st_cur; /* current symbol table entry */ - struct _symtable_entry *st_top; /* module entry */ - PyObject *st_symbols; /* dictionary of symbol table entries */ - PyObject *st_stack; /* stack of namespace info */ - PyObject *st_global; /* borrowed ref to MODULE in st_symbols */ - int st_nblocks; /* number of blocks */ - PyObject *st_private; /* name of current class or NULL */ - PyFutureFeatures *st_future; /* module's future features */ -}; - -typedef struct _symtable_entry { - PyObject_HEAD - PyObject *ste_id; /* int: key in st_symbols */ - PyObject *ste_symbols; /* dict: name to flags */ - PyObject *ste_name; /* string: name of block */ - PyObject *ste_varnames; /* list of variable names */ - PyObject *ste_children; /* list of child ids */ - _Py_block_ty ste_type; /* module, class, or function */ - int ste_unoptimized; /* false if namespace is optimized */ - int ste_nested; /* true if block is nested */ - unsigned ste_free : 1; /* true if block has free variables */ - unsigned ste_child_free : 1; /* true if a child block has free vars, - including free refs to globals */ - unsigned ste_generator : 1; /* true if namespace is a generator */ - unsigned ste_varargs : 1; /* true if block has varargs */ - unsigned ste_varkeywords : 1; /* true if block has varkeywords */ - unsigned ste_returns_value : 1; /* true if namespace uses return with - an argument */ - int ste_lineno; /* first line of block */ - int ste_opt_lineno; /* lineno of last exec or import * */ - int ste_tmpname; /* counter for listcomp temp vars */ - struct symtable *ste_table; -} PySTEntryObject; - -PyAPI_DATA(PyTypeObject) PySTEntry_Type; - -#define PySTEntry_Check(op) (Py_TYPE(op) == &PySTEntry_Type) - -PyAPI_FUNC(int) PyST_GetScope(PySTEntryObject *, PyObject *); - -PyAPI_FUNC(struct symtable *) PySymtable_Build(mod_ty, const char *, - PyFutureFeatures *); -PyAPI_FUNC(PySTEntryObject *) PySymtable_Lookup(struct symtable *, void *); - -PyAPI_FUNC(void) PySymtable_Free(struct symtable *); - -/* Flags for def-use information */ - -#define DEF_GLOBAL 1 /* global stmt */ -#define DEF_LOCAL 2 /* assignment in code block */ -#define DEF_PARAM 2<<1 /* formal parameter */ -#define USE 2<<2 /* name is used */ -#define DEF_FREE 2<<3 /* name used but not defined in nested block */ -#define DEF_FREE_CLASS 2<<4 /* free variable from class's method */ -#define DEF_IMPORT 2<<5 /* assignment occurred via import */ - -#define DEF_BOUND (DEF_LOCAL | DEF_PARAM | DEF_IMPORT) - -/* GLOBAL_EXPLICIT and GLOBAL_IMPLICIT are used internally by the symbol - table. GLOBAL is returned from PyST_GetScope() for either of them. - It is stored in ste_symbols at bits 12-14. -*/ -#define SCOPE_OFF 11 -#define SCOPE_MASK 7 - -#define LOCAL 1 -#define GLOBAL_EXPLICIT 2 -#define GLOBAL_IMPLICIT 3 -#define FREE 4 -#define CELL 5 - -/* The following three names are used for the ste_unoptimized bit field */ -#define OPT_IMPORT_STAR 1 -#define OPT_EXEC 2 -#define OPT_BARE_EXEC 4 -#define OPT_TOPLEVEL 8 /* top-level names, including eval and exec */ - -#define GENERATOR 1 -#define GENERATOR_EXPRESSION 2 - -#ifdef __cplusplus -} -#endif -#endif /* !Py_SYMTABLE_H */
--- a/test/include/python2.7/sysmodule.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ - -/* System module interface */ - -#ifndef Py_SYSMODULE_H -#define Py_SYSMODULE_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_FUNC(PyObject *) PySys_GetObject(char *); -PyAPI_FUNC(int) PySys_SetObject(char *, PyObject *); -PyAPI_FUNC(FILE *) PySys_GetFile(char *, FILE *); -PyAPI_FUNC(void) PySys_SetArgv(int, char **); -PyAPI_FUNC(void) PySys_SetArgvEx(int, char **, int); -PyAPI_FUNC(void) PySys_SetPath(char *); - -PyAPI_FUNC(void) PySys_WriteStdout(const char *format, ...) - Py_GCC_ATTRIBUTE((format(printf, 1, 2))); -PyAPI_FUNC(void) PySys_WriteStderr(const char *format, ...) - Py_GCC_ATTRIBUTE((format(printf, 1, 2))); - -PyAPI_FUNC(void) PySys_ResetWarnOptions(void); -PyAPI_FUNC(void) PySys_AddWarnOption(char *); -PyAPI_FUNC(int) PySys_HasWarnOptions(void); - -PyAPI_FUNC(size_t) _PySys_GetSizeOf(PyObject *); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_SYSMODULE_H */
--- a/test/include/python2.7/timefuncs.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -/* timefuncs.h - */ - -/* Utility function related to timemodule.c. */ - -#ifndef TIMEFUNCS_H -#define TIMEFUNCS_H -#ifdef __cplusplus -extern "C" { -#endif - - -/* Cast double x to time_t, but raise ValueError if x is too large - * to fit in a time_t. ValueError is set on return iff the return - * value is (time_t)-1 and PyErr_Occurred(). - */ -PyAPI_FUNC(time_t) _PyTime_DoubleToTimet(double x); - -/* Get the current time since the epoch in seconds */ -PyAPI_FUNC(double) _PyTime_FloatTime(void); - - -#ifdef __cplusplus -} -#endif -#endif /* TIMEFUNCS_H */
--- a/test/include/python2.7/token.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,85 +0,0 @@ - -/* Token types */ - -#ifndef Py_TOKEN_H -#define Py_TOKEN_H -#ifdef __cplusplus -extern "C" { -#endif - -#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ - -#define ENDMARKER 0 -#define NAME 1 -#define NUMBER 2 -#define STRING 3 -#define NEWLINE 4 -#define INDENT 5 -#define DEDENT 6 -#define LPAR 7 -#define RPAR 8 -#define LSQB 9 -#define RSQB 10 -#define COLON 11 -#define COMMA 12 -#define SEMI 13 -#define PLUS 14 -#define MINUS 15 -#define STAR 16 -#define SLASH 17 -#define VBAR 18 -#define AMPER 19 -#define LESS 20 -#define GREATER 21 -#define EQUAL 22 -#define DOT 23 -#define PERCENT 24 -#define BACKQUOTE 25 -#define LBRACE 26 -#define RBRACE 27 -#define EQEQUAL 28 -#define NOTEQUAL 29 -#define LESSEQUAL 30 -#define GREATEREQUAL 31 -#define TILDE 32 -#define CIRCUMFLEX 33 -#define LEFTSHIFT 34 -#define RIGHTSHIFT 35 -#define DOUBLESTAR 36 -#define PLUSEQUAL 37 -#define MINEQUAL 38 -#define STAREQUAL 39 -#define SLASHEQUAL 40 -#define PERCENTEQUAL 41 -#define AMPEREQUAL 42 -#define VBAREQUAL 43 -#define CIRCUMFLEXEQUAL 44 -#define LEFTSHIFTEQUAL 45 -#define RIGHTSHIFTEQUAL 46 -#define DOUBLESTAREQUAL 47 -#define DOUBLESLASH 48 -#define DOUBLESLASHEQUAL 49 -#define AT 50 -/* Don't forget to update the table _PyParser_TokenNames in tokenizer.c! */ -#define OP 51 -#define ERRORTOKEN 52 -#define N_TOKENS 53 - -/* Special definitions for cooperation with parser */ - -#define NT_OFFSET 256 - -#define ISTERMINAL(x) ((x) < NT_OFFSET) -#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) -#define ISEOF(x) ((x) == ENDMARKER) - - -PyAPI_DATA(char *) _PyParser_TokenNames[]; /* Token names */ -PyAPI_FUNC(int) PyToken_OneChar(int); -PyAPI_FUNC(int) PyToken_TwoChars(int, int); -PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_TOKEN_H */
--- a/test/include/python2.7/traceback.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ - -#ifndef Py_TRACEBACK_H -#define Py_TRACEBACK_H -#ifdef __cplusplus -extern "C" { -#endif - -struct _frame; - -/* Traceback interface */ - -typedef struct _traceback { - PyObject_HEAD - struct _traceback *tb_next; - struct _frame *tb_frame; - int tb_lasti; - int tb_lineno; -} PyTracebackObject; - -PyAPI_FUNC(int) PyTraceBack_Here(struct _frame *); -PyAPI_FUNC(int) PyTraceBack_Print(PyObject *, PyObject *); -PyAPI_FUNC(int) _Py_DisplaySourceLine(PyObject *, const char *, int, int); - -/* Reveal traceback type so we can typecheck traceback objects */ -PyAPI_DATA(PyTypeObject) PyTraceBack_Type; -#define PyTraceBack_Check(v) (Py_TYPE(v) == &PyTraceBack_Type) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_TRACEBACK_H */
--- a/test/include/python2.7/tupleobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ - -/* Tuple object interface */ - -#ifndef Py_TUPLEOBJECT_H -#define Py_TUPLEOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - -/* -Another generally useful object type is a tuple of object pointers. -For Python, this is an immutable type. C code can change the tuple items -(but not their number), and even use tuples are general-purpose arrays of -object references, but in general only brand new tuples should be mutated, -not ones that might already have been exposed to Python code. - -*** WARNING *** PyTuple_SetItem does not increment the new item's reference -count, but does decrement the reference count of the item it replaces, -if not nil. It does *decrement* the reference count if it is *not* -inserted in the tuple. Similarly, PyTuple_GetItem does not increment the -returned item's reference count. -*/ - -typedef struct { - PyObject_VAR_HEAD - PyObject *ob_item[1]; - - /* ob_item contains space for 'ob_size' elements. - * Items must normally not be NULL, except during construction when - * the tuple is not yet visible outside the function that builds it. - */ -} PyTupleObject; - -PyAPI_DATA(PyTypeObject) PyTuple_Type; - -#define PyTuple_Check(op) \ - PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_TUPLE_SUBCLASS) -#define PyTuple_CheckExact(op) (Py_TYPE(op) == &PyTuple_Type) - -PyAPI_FUNC(PyObject *) PyTuple_New(Py_ssize_t size); -PyAPI_FUNC(Py_ssize_t) PyTuple_Size(PyObject *); -PyAPI_FUNC(PyObject *) PyTuple_GetItem(PyObject *, Py_ssize_t); -PyAPI_FUNC(int) PyTuple_SetItem(PyObject *, Py_ssize_t, PyObject *); -PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); -PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t); -PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...); -PyAPI_FUNC(void) _PyTuple_MaybeUntrack(PyObject *); - -/* Macro, trading safety for speed */ -#define PyTuple_GET_ITEM(op, i) (((PyTupleObject *)(op))->ob_item[i]) -#define PyTuple_GET_SIZE(op) Py_SIZE(op) - -/* Macro, *only* to be used to fill in brand new tuples */ -#define PyTuple_SET_ITEM(op, i, v) (((PyTupleObject *)(op))->ob_item[i] = v) - -PyAPI_FUNC(int) PyTuple_ClearFreeList(void); - -#ifdef __cplusplus -} -#endif -#endif /* !Py_TUPLEOBJECT_H */
--- a/test/include/python2.7/ucnhash.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -/* Unicode name database interface */ - -#ifndef Py_UCNHASH_H -#define Py_UCNHASH_H -#ifdef __cplusplus -extern "C" { -#endif - -/* revised ucnhash CAPI interface (exported through a "wrapper") */ - -#define PyUnicodeData_CAPSULE_NAME "unicodedata.ucnhash_CAPI" - -typedef struct { - - /* Size of this struct */ - int size; - - /* Get name for a given character code. Returns non-zero if - success, zero if not. Does not set Python exceptions. - If self is NULL, data come from the default version of the database. - If it is not NULL, it should be a unicodedata.ucd_X_Y_Z object */ - int (*getname)(PyObject *self, Py_UCS4 code, char* buffer, int buflen); - - /* Get character code for a given name. Same error handling - as for getname. */ - int (*getcode)(PyObject *self, const char* name, int namelen, Py_UCS4* code); - -} _PyUnicode_Name_CAPI; - -#ifdef __cplusplus -} -#endif -#endif /* !Py_UCNHASH_H */
--- a/test/include/python2.7/unicodeobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1413 +0,0 @@ -#ifndef Py_UNICODEOBJECT_H -#define Py_UNICODEOBJECT_H - -#include <stdarg.h> - -/* - -Unicode implementation based on original code by Fredrik Lundh, -modified by Marc-Andre Lemburg (mal@lemburg.com) according to the -Unicode Integration Proposal (see file Misc/unicode.txt). - -Copyright (c) Corporation for National Research Initiatives. - - - Original header: - -------------------------------------------------------------------- - - * Yet another Unicode string type for Python. This type supports the - * 16-bit Basic Multilingual Plane (BMP) only. - * - * Written by Fredrik Lundh, January 1999. - * - * Copyright (c) 1999 by Secret Labs AB. - * Copyright (c) 1999 by Fredrik Lundh. - * - * fredrik@pythonware.com - * http://www.pythonware.com - * - * -------------------------------------------------------------------- - * This Unicode String Type is - * - * Copyright (c) 1999 by Secret Labs AB - * Copyright (c) 1999 by Fredrik Lundh - * - * By obtaining, using, and/or copying this software and/or its - * associated documentation, you agree that you have read, understood, - * and will comply with the following terms and conditions: - * - * Permission to use, copy, modify, and distribute this software and its - * associated documentation for any purpose and without fee is hereby - * granted, provided that the above copyright notice appears in all - * copies, and that both that copyright notice and this permission notice - * appear in supporting documentation, and that the name of Secret Labs - * AB or the author not be used in advertising or publicity pertaining to - * distribution of the software without specific, written prior - * permission. - * - * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO - * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND - * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT - * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - * -------------------------------------------------------------------- */ - -#include <ctype.h> - -/* === Internal API ======================================================= */ - -/* --- Internal Unicode Format -------------------------------------------- */ - -#ifndef Py_USING_UNICODE - -#define PyUnicode_Check(op) 0 -#define PyUnicode_CheckExact(op) 0 - -#else - -/* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is - properly set, but the default rules below doesn't set it. I'll - sort this out some other day -- fredrik@pythonware.com */ - -#ifndef Py_UNICODE_SIZE -#error Must define Py_UNICODE_SIZE -#endif - -/* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode - strings are stored as UCS-2 (with limited support for UTF-16) */ - -#if Py_UNICODE_SIZE >= 4 -#define Py_UNICODE_WIDE -#endif - -/* Set these flags if the platform has "wchar.h", "wctype.h" and the - wchar_t type is a 16-bit unsigned type */ -/* #define HAVE_WCHAR_H */ -/* #define HAVE_USABLE_WCHAR_T */ - -/* Defaults for various platforms */ -#ifndef PY_UNICODE_TYPE - -/* Windows has a usable wchar_t type (unless we're using UCS-4) */ -# if defined(MS_WIN32) && Py_UNICODE_SIZE == 2 -# define HAVE_USABLE_WCHAR_T -# define PY_UNICODE_TYPE wchar_t -# endif - -# if defined(Py_UNICODE_WIDE) -# define PY_UNICODE_TYPE Py_UCS4 -# endif - -#endif - -/* If the compiler provides a wchar_t type we try to support it - through the interface functions PyUnicode_FromWideChar() and - PyUnicode_AsWideChar(). */ - -#ifdef HAVE_USABLE_WCHAR_T -# ifndef HAVE_WCHAR_H -# define HAVE_WCHAR_H -# endif -#endif - -#ifdef HAVE_WCHAR_H -/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */ -# ifdef _HAVE_BSDI -# include <time.h> -# endif -# include <wchar.h> -#endif - -/* - * Use this typedef when you need to represent a UTF-16 surrogate pair - * as single unsigned integer. - */ -#if SIZEOF_INT >= 4 -typedef unsigned int Py_UCS4; -#elif SIZEOF_LONG >= 4 -typedef unsigned long Py_UCS4; -#endif - -/* Py_UNICODE is the native Unicode storage format (code unit) used by - Python and represents a single Unicode element in the Unicode - type. */ - -typedef PY_UNICODE_TYPE Py_UNICODE; - -/* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */ - -/* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds - produce different external names and thus cause import errors in - case Python interpreters and extensions with mixed compiled in - Unicode width assumptions are combined. */ - -#ifndef Py_UNICODE_WIDE - -# define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString -# define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString -# define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject -# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString -# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String -# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString -# define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String -# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String -# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String -# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode -# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString -# define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar -# define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist -# define PyUnicode_Compare PyUnicodeUCS2_Compare -# define PyUnicode_Concat PyUnicodeUCS2_Concat -# define PyUnicode_Contains PyUnicodeUCS2_Contains -# define PyUnicode_Count PyUnicodeUCS2_Count -# define PyUnicode_Decode PyUnicodeUCS2_Decode -# define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII -# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap -# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1 -# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape -# define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32 -# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful -# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16 -# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful -# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8 -# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful -# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape -# define PyUnicode_Encode PyUnicodeUCS2_Encode -# define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII -# define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap -# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal -# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1 -# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape -# define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32 -# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16 -# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8 -# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape -# define PyUnicode_Find PyUnicodeUCS2_Find -# define PyUnicode_Format PyUnicodeUCS2_Format -# define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject -# define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat -# define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV -# define PyUnicode_FromObject PyUnicodeUCS2_FromObject -# define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal -# define PyUnicode_FromString PyUnicodeUCS2_FromString -# define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize -# define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode -# define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar -# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding -# define PyUnicode_GetMax PyUnicodeUCS2_GetMax -# define PyUnicode_GetSize PyUnicodeUCS2_GetSize -# define PyUnicode_Join PyUnicodeUCS2_Join -# define PyUnicode_Partition PyUnicodeUCS2_Partition -# define PyUnicode_RPartition PyUnicodeUCS2_RPartition -# define PyUnicode_RSplit PyUnicodeUCS2_RSplit -# define PyUnicode_Replace PyUnicodeUCS2_Replace -# define PyUnicode_Resize PyUnicodeUCS2_Resize -# define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare -# define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding -# define PyUnicode_Split PyUnicodeUCS2_Split -# define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines -# define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch -# define PyUnicode_Translate PyUnicodeUCS2_Translate -# define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap -# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString -# define _PyUnicode_Fini _PyUnicodeUCS2_Fini -# define _PyUnicode_Init _PyUnicodeUCS2_Init -# define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha -# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit -# define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit -# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak -# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase -# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric -# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase -# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase -# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace -# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit -# define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit -# define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase -# define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric -# define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase -# define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase - -#else - -# define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString -# define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString -# define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject -# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString -# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String -# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString -# define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String -# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String -# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String -# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode -# define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString -# define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar -# define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist -# define PyUnicode_Compare PyUnicodeUCS4_Compare -# define PyUnicode_Concat PyUnicodeUCS4_Concat -# define PyUnicode_Contains PyUnicodeUCS4_Contains -# define PyUnicode_Count PyUnicodeUCS4_Count -# define PyUnicode_Decode PyUnicodeUCS4_Decode -# define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII -# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap -# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1 -# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape -# define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32 -# define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful -# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16 -# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful -# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8 -# define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful -# define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape -# define PyUnicode_Encode PyUnicodeUCS4_Encode -# define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII -# define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap -# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal -# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1 -# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape -# define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32 -# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16 -# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8 -# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape -# define PyUnicode_Find PyUnicodeUCS4_Find -# define PyUnicode_Format PyUnicodeUCS4_Format -# define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject -# define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat -# define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV -# define PyUnicode_FromObject PyUnicodeUCS4_FromObject -# define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal -# define PyUnicode_FromString PyUnicodeUCS4_FromString -# define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize -# define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode -# define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar -# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding -# define PyUnicode_GetMax PyUnicodeUCS4_GetMax -# define PyUnicode_GetSize PyUnicodeUCS4_GetSize -# define PyUnicode_Join PyUnicodeUCS4_Join -# define PyUnicode_Partition PyUnicodeUCS4_Partition -# define PyUnicode_RPartition PyUnicodeUCS4_RPartition -# define PyUnicode_RSplit PyUnicodeUCS4_RSplit -# define PyUnicode_Replace PyUnicodeUCS4_Replace -# define PyUnicode_Resize PyUnicodeUCS4_Resize -# define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare -# define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding -# define PyUnicode_Split PyUnicodeUCS4_Split -# define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines -# define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch -# define PyUnicode_Translate PyUnicodeUCS4_Translate -# define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap -# define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString -# define _PyUnicode_Fini _PyUnicodeUCS4_Fini -# define _PyUnicode_Init _PyUnicodeUCS4_Init -# define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha -# define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit -# define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit -# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak -# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase -# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric -# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase -# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase -# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace -# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit -# define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit -# define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase -# define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric -# define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase -# define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase - - -#endif - -/* --- Internal Unicode Operations ---------------------------------------- */ - -/* If you want Python to use the compiler's wctype.h functions instead - of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or - configure Python using --with-wctype-functions. This reduces the - interpreter's code size. */ - -#if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS) - -#include <wctype.h> - -#define Py_UNICODE_ISSPACE(ch) iswspace(ch) - -#define Py_UNICODE_ISLOWER(ch) iswlower(ch) -#define Py_UNICODE_ISUPPER(ch) iswupper(ch) -#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) -#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) - -#define Py_UNICODE_TOLOWER(ch) towlower(ch) -#define Py_UNICODE_TOUPPER(ch) towupper(ch) -#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) - -#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) -#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) -#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) - -#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) -#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) -#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) - -#define Py_UNICODE_ISALPHA(ch) iswalpha(ch) - -#else - -/* Since splitting on whitespace is an important use case, and - whitespace in most situations is solely ASCII whitespace, we - optimize for the common case by using a quick look-up table - _Py_ascii_whitespace (see below) with an inlined check. - - */ -#define Py_UNICODE_ISSPACE(ch) \ - ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) - -#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) -#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) -#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) -#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) - -#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) -#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) -#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) - -#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) -#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) -#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) - -#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) -#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) -#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) - -#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) - -#endif - -#define Py_UNICODE_ISALNUM(ch) \ - (Py_UNICODE_ISALPHA(ch) || \ - Py_UNICODE_ISDECIMAL(ch) || \ - Py_UNICODE_ISDIGIT(ch) || \ - Py_UNICODE_ISNUMERIC(ch)) - -#define Py_UNICODE_COPY(target, source, length) \ - Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE)) - -#define Py_UNICODE_FILL(target, value, length) \ - do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ - for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ - } while (0) - -/* Check if substring matches at given offset. the offset must be - valid, and the substring must not be empty */ - -#define Py_UNICODE_MATCH(string, offset, substring) \ - ((*((string)->str + (offset)) == *((substring)->str)) && \ - ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \ - !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE))) - -#ifdef __cplusplus -extern "C" { -#endif - -/* --- Unicode Type ------------------------------------------------------- */ - -typedef struct { - PyObject_HEAD - Py_ssize_t length; /* Length of raw Unicode data in buffer */ - Py_UNICODE *str; /* Raw Unicode buffer */ - long hash; /* Hash value; -1 if not set */ - PyObject *defenc; /* (Default) Encoded version as Python - string, or NULL; this is used for - implementing the buffer protocol */ -} PyUnicodeObject; - -PyAPI_DATA(PyTypeObject) PyUnicode_Type; - -#define PyUnicode_Check(op) \ - PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) -#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) - -/* Fast access macros */ -#define PyUnicode_GET_SIZE(op) \ - (((PyUnicodeObject *)(op))->length) -#define PyUnicode_GET_DATA_SIZE(op) \ - (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)) -#define PyUnicode_AS_UNICODE(op) \ - (((PyUnicodeObject *)(op))->str) -#define PyUnicode_AS_DATA(op) \ - ((const char *)((PyUnicodeObject *)(op))->str) - -/* --- Constants ---------------------------------------------------------- */ - -/* This Unicode character will be used as replacement character during - decoding if the errors argument is set to "replace". Note: the - Unicode character U+FFFD is the official REPLACEMENT CHARACTER in - Unicode 3.0. */ - -#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD) - -/* === Public API ========================================================= */ - -/* --- Plain Py_UNICODE --------------------------------------------------- */ - -/* Create a Unicode Object from the Py_UNICODE buffer u of the given - size. - - u may be NULL which causes the contents to be undefined. It is the - user's responsibility to fill in the needed data afterwards. Note - that modifying the Unicode object contents after construction is - only allowed if u was set to NULL. - - The buffer is copied into the new object. */ - -PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( - const Py_UNICODE *u, /* Unicode buffer */ - Py_ssize_t size /* size of buffer */ - ); - -/* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */ -PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( - const char *u, /* char buffer */ - Py_ssize_t size /* size of buffer */ - ); - -/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated - Latin-1 encoded bytes */ -PyAPI_FUNC(PyObject*) PyUnicode_FromString( - const char *u /* string */ - ); - -/* Return a read-only pointer to the Unicode object's internal - Py_UNICODE buffer. */ - -PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( - PyObject *unicode /* Unicode object */ - ); - -/* Get the length of the Unicode object. */ - -PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize( - PyObject *unicode /* Unicode object */ - ); - -/* Get the maximum ordinal for a Unicode character. */ -PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void); - -/* Resize an already allocated Unicode object to the new size length. - - *unicode is modified to point to the new (resized) object and 0 - returned on success. - - This API may only be called by the function which also called the - Unicode constructor. The refcount on the object must be 1. Otherwise, - an error is returned. - - Error handling is implemented as follows: an exception is set, -1 - is returned and *unicode left untouched. - -*/ - -PyAPI_FUNC(int) PyUnicode_Resize( - PyObject **unicode, /* Pointer to the Unicode object */ - Py_ssize_t length /* New length */ - ); - -/* Coerce obj to a Unicode object and return a reference with - *incremented* refcount. - - Coercion is done in the following way: - - 1. String and other char buffer compatible objects are decoded - under the assumptions that they contain data using the current - default encoding. Decoding is done in "strict" mode. - - 2. All other objects (including Unicode objects) raise an - exception. - - The API returns NULL in case of an error. The caller is responsible - for decref'ing the returned objects. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( - register PyObject *obj, /* Object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Coerce obj to a Unicode object and return a reference with - *incremented* refcount. - - Unicode objects are passed back as-is (subclasses are converted to - true Unicode objects), all other objects are delegated to - PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in - using the default encoding as basis for decoding the object. - - The API returns NULL in case of an error. The caller is responsible - for decref'ing the returned objects. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_FromObject( - register PyObject *obj /* Object */ - ); - -PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list); -PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...); - -/* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj, - Py_UNICODE *format_spec, - Py_ssize_t format_spec_len); - -/* --- wchar_t support for platforms which support it --------------------- */ - -#ifdef HAVE_WCHAR_H - -/* Create a Unicode Object from the whcar_t buffer w of the given - size. - - The buffer is copied into the new object. */ - -PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( - register const wchar_t *w, /* wchar_t buffer */ - Py_ssize_t size /* size of buffer */ - ); - -/* Copies the Unicode Object contents into the wchar_t buffer w. At - most size wchar_t characters are copied. - - Note that the resulting wchar_t string may or may not be - 0-terminated. It is the responsibility of the caller to make sure - that the wchar_t string is 0-terminated in case this is required by - the application. - - Returns the number of wchar_t characters copied (excluding a - possibly trailing 0-termination character) or -1 in case of an - error. */ - -PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( - PyUnicodeObject *unicode, /* Unicode object */ - register wchar_t *w, /* wchar_t buffer */ - Py_ssize_t size /* size of buffer */ - ); - -#endif - -/* --- Unicode ordinals --------------------------------------------------- */ - -/* Create a Unicode Object from the given Unicode code point ordinal. - - The ordinal must be in range(0x10000) on narrow Python builds - (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is - raised in case it is not. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); - -/* --- Free-list management ----------------------------------------------- */ - -/* Clear the free list used by the Unicode implementation. - - This can be used to release memory used for objects on the free - list back to the Python memory allocator. - -*/ - -PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); - -/* === Builtin Codecs ===================================================== - - Many of these APIs take two arguments encoding and errors. These - parameters encoding and errors have the same semantics as the ones - of the builtin unicode() API. - - Setting encoding to NULL causes the default encoding to be used. - - Error handling is set by errors which may also be set to NULL - meaning to use the default handling defined for the codec. Default - error handling for all builtin codecs is "strict" (ValueErrors are - raised). - - The codecs all use a similar interface. Only deviation from the - generic ones are documented. - -*/ - -/* --- Manage the default encoding ---------------------------------------- */ - -/* Return a Python string holding the default encoded value of the - Unicode object. - - The resulting string is cached in the Unicode object for subsequent - usage by this function. The cached version is needed to implement - the character buffer interface and will live (at least) as long as - the Unicode object itself. - - The refcount of the string is *not* incremented. - - *** Exported for internal use by the interpreter only !!! *** - -*/ - -PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString( - PyObject *, const char *); - -/* Returns the currently active default encoding. - - The default encoding is currently implemented as run-time settable - process global. This may change in future versions of the - interpreter to become a parameter which is managed on a per-thread - basis. - - */ - -PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); - -/* Sets the currently active default encoding. - - Returns 0 on success, -1 in case of an error. - - */ - -PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding( - const char *encoding /* Encoding name in standard form */ - ); - -/* --- Generic Codecs ----------------------------------------------------- */ - -/* Create a Unicode object by decoding the encoded string s of the - given size. */ - -PyAPI_FUNC(PyObject*) PyUnicode_Decode( - const char *s, /* encoded string */ - Py_ssize_t size, /* size of buffer */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Encodes a Py_UNICODE buffer of the given size and returns a - Python string object. */ - -PyAPI_FUNC(PyObject*) PyUnicode_Encode( - const Py_UNICODE *s, /* Unicode char buffer */ - Py_ssize_t size, /* number of Py_UNICODE chars to encode */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Encodes a Unicode object and returns the result as Python - object. */ - -PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( - PyObject *unicode, /* Unicode object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -/* Encodes a Unicode object and returns the result as Python string - object. */ - -PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( - PyObject *unicode, /* Unicode object */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( - PyObject* string /* 256 character map */ - ); - - -/* --- UTF-7 Codecs ------------------------------------------------------- */ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( - const char *string, /* UTF-7 encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors /* error handling */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( - const char *string, /* UTF-7 encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - Py_ssize_t *consumed /* bytes consumed */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* number of Py_UNICODE chars to encode */ - int base64SetO, /* Encode RFC2152 Set O characters in base64 */ - int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ - const char *errors /* error handling */ - ); - -/* --- UTF-8 Codecs ------------------------------------------------------- */ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( - const char *string, /* UTF-8 encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors /* error handling */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( - const char *string, /* UTF-8 encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - Py_ssize_t *consumed /* bytes consumed */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( - PyObject *unicode /* Unicode object */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* number of Py_UNICODE chars to encode */ - const char *errors /* error handling */ - ); - -/* --- UTF-32 Codecs ------------------------------------------------------ */ - -/* Decodes length bytes from a UTF-32 encoded buffer string and returns - the corresponding Unicode object. - - errors (if non-NULL) defines the error handling. It defaults - to "strict". - - If byteorder is non-NULL, the decoder starts decoding using the - given byte order: - - *byteorder == -1: little endian - *byteorder == 0: native order - *byteorder == 1: big endian - - In native mode, the first four bytes of the stream are checked for a - BOM mark. If found, the BOM mark is analysed, the byte order - adjusted and the BOM skipped. In the other modes, no BOM mark - interpretation is done. After completion, *byteorder is set to the - current byte order at the end of input data. - - If byteorder is NULL, the codec starts in native order mode. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( - const char *string, /* UTF-32 encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - int *byteorder /* pointer to byteorder to use - 0=native;-1=LE,1=BE; updated on - exit */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( - const char *string, /* UTF-32 encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - int *byteorder, /* pointer to byteorder to use - 0=native;-1=LE,1=BE; updated on - exit */ - Py_ssize_t *consumed /* bytes consumed */ - ); - -/* Returns a Python string using the UTF-32 encoding in native byte - order. The string always starts with a BOM mark. */ - -PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( - PyObject *unicode /* Unicode object */ - ); - -/* Returns a Python string object holding the UTF-32 encoded value of - the Unicode data. - - If byteorder is not 0, output is written according to the following - byte order: - - byteorder == -1: little endian - byteorder == 0: native byte order (writes a BOM mark) - byteorder == 1: big endian - - If byteorder is 0, the output string will always start with the - Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is - prepended. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* number of Py_UNICODE chars to encode */ - const char *errors, /* error handling */ - int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ - ); - -/* --- UTF-16 Codecs ------------------------------------------------------ */ - -/* Decodes length bytes from a UTF-16 encoded buffer string and returns - the corresponding Unicode object. - - errors (if non-NULL) defines the error handling. It defaults - to "strict". - - If byteorder is non-NULL, the decoder starts decoding using the - given byte order: - - *byteorder == -1: little endian - *byteorder == 0: native order - *byteorder == 1: big endian - - In native mode, the first two bytes of the stream are checked for a - BOM mark. If found, the BOM mark is analysed, the byte order - adjusted and the BOM skipped. In the other modes, no BOM mark - interpretation is done. After completion, *byteorder is set to the - current byte order at the end of input data. - - If byteorder is NULL, the codec starts in native order mode. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( - const char *string, /* UTF-16 encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - int *byteorder /* pointer to byteorder to use - 0=native;-1=LE,1=BE; updated on - exit */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( - const char *string, /* UTF-16 encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - int *byteorder, /* pointer to byteorder to use - 0=native;-1=LE,1=BE; updated on - exit */ - Py_ssize_t *consumed /* bytes consumed */ - ); - -/* Returns a Python string using the UTF-16 encoding in native byte - order. The string always starts with a BOM mark. */ - -PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( - PyObject *unicode /* Unicode object */ - ); - -/* Returns a Python string object holding the UTF-16 encoded value of - the Unicode data. - - If byteorder is not 0, output is written according to the following - byte order: - - byteorder == -1: little endian - byteorder == 0: native byte order (writes a BOM mark) - byteorder == 1: big endian - - If byteorder is 0, the output string will always start with the - Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is - prepended. - - Note that Py_UNICODE data is being interpreted as UTF-16 reduced to - UCS-2. This trick makes it possible to add full UTF-16 capabilities - at a later point without compromising the APIs. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* number of Py_UNICODE chars to encode */ - const char *errors, /* error handling */ - int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ - ); - -/* --- Unicode-Escape Codecs ---------------------------------------------- */ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( - const char *string, /* Unicode-Escape encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors /* error handling */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( - PyObject *unicode /* Unicode object */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length /* Number of Py_UNICODE chars to encode */ - ); - -/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( - const char *string, /* Raw-Unicode-Escape encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors /* error handling */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( - PyObject *unicode /* Unicode object */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length /* Number of Py_UNICODE chars to encode */ - ); - -/* --- Unicode Internal Codec --------------------------------------------- - - Only for internal use in _codecsmodule.c */ - -PyObject *_PyUnicode_DecodeUnicodeInternal( - const char *string, - Py_ssize_t length, - const char *errors - ); - -/* --- Latin-1 Codecs ----------------------------------------------------- - - Note: Latin-1 corresponds to the first 256 Unicode ordinals. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( - const char *string, /* Latin-1 encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors /* error handling */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( - PyObject *unicode /* Unicode object */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - const char *errors /* error handling */ - ); - -/* --- ASCII Codecs ------------------------------------------------------- - - Only 7-bit ASCII data is excepted. All other codes generate errors. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( - const char *string, /* ASCII encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors /* error handling */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( - PyObject *unicode /* Unicode object */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - const char *errors /* error handling */ - ); - -/* --- Character Map Codecs ----------------------------------------------- - - This codec uses mappings to encode and decode characters. - - Decoding mappings must map single string characters to single - Unicode characters, integers (which are then interpreted as Unicode - ordinals) or None (meaning "undefined mapping" and causing an - error). - - Encoding mappings must map single Unicode characters to single - string characters, integers (which are then interpreted as Latin-1 - ordinals) or None (meaning "undefined mapping" and causing an - error). - - If a character lookup fails with a LookupError, the character is - copied as-is meaning that its ordinal value will be interpreted as - Unicode or Latin-1 ordinal resp. Because of this mappings only need - to contain those mappings which map characters to different code - points. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( - const char *string, /* Encoded string */ - Py_ssize_t length, /* size of string */ - PyObject *mapping, /* character mapping - (char ordinal -> unicode ordinal) */ - const char *errors /* error handling */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( - PyObject *unicode, /* Unicode object */ - PyObject *mapping /* character mapping - (unicode ordinal -> char ordinal) */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - PyObject *mapping, /* character mapping - (unicode ordinal -> char ordinal) */ - const char *errors /* error handling */ - ); - -/* Translate a Py_UNICODE buffer of the given length by applying a - character mapping table to it and return the resulting Unicode - object. - - The mapping table must map Unicode ordinal integers to Unicode - ordinal integers or None (causing deletion of the character). - - Mapping tables may be dictionaries or sequences. Unmapped character - ordinals (ones which cause a LookupError) are left untouched and - are copied as-is. - -*/ - -PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - PyObject *table, /* Translate table */ - const char *errors /* error handling */ - ); - -#ifdef MS_WIN32 - -/* --- MBCS codecs for Windows -------------------------------------------- */ - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( - const char *string, /* MBCS encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors /* error handling */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( - const char *string, /* MBCS encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - Py_ssize_t *consumed /* bytes consumed */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( - PyObject *unicode /* Unicode object */ - ); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - const char *errors /* error handling */ - ); - -#endif /* MS_WIN32 */ - -/* --- Decimal Encoder ---------------------------------------------------- */ - -/* Takes a Unicode string holding a decimal value and writes it into - an output buffer using standard ASCII digit codes. - - The output buffer has to provide at least length+1 bytes of storage - area. The output string is 0-terminated. - - The encoder converts whitespace to ' ', decimal characters to their - corresponding ASCII digit and all other Latin-1 characters except - \0 as-is. Characters outside this range (Unicode ordinals 1-256) - are treated as errors. This includes embedded NULL bytes. - - Error handling is defined by the errors argument: - - NULL or "strict": raise a ValueError - "ignore": ignore the wrong characters (these are not copied to the - output buffer) - "replace": replaces illegal characters with '?' - - Returns 0 on success, -1 on failure. - -*/ - -PyAPI_FUNC(int) PyUnicode_EncodeDecimal( - Py_UNICODE *s, /* Unicode buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - char *output, /* Output buffer; must have size >= length */ - const char *errors /* error handling */ - ); - -/* --- Methods & Slots ---------------------------------------------------- - - These are capable of handling Unicode objects and strings on input - (we refer to them as strings in the descriptions) and return - Unicode objects or integers as apporpriate. */ - -/* Concat two strings giving a new Unicode string. */ - -PyAPI_FUNC(PyObject*) PyUnicode_Concat( - PyObject *left, /* Left string */ - PyObject *right /* Right string */ - ); - -/* Split a string giving a list of Unicode strings. - - If sep is NULL, splitting will be done at all whitespace - substrings. Otherwise, splits occur at the given separator. - - At most maxsplit splits will be done. If negative, no limit is set. - - Separators are not included in the resulting list. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_Split( - PyObject *s, /* String to split */ - PyObject *sep, /* String separator */ - Py_ssize_t maxsplit /* Maxsplit count */ - ); - -/* Dito, but split at line breaks. - - CRLF is considered to be one line break. Line breaks are not - included in the resulting list. */ - -PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( - PyObject *s, /* String to split */ - int keepends /* If true, line end markers are included */ - ); - -/* Partition a string using a given separator. */ - -PyAPI_FUNC(PyObject*) PyUnicode_Partition( - PyObject *s, /* String to partition */ - PyObject *sep /* String separator */ - ); - -/* Partition a string using a given separator, searching from the end of the - string. */ - -PyAPI_FUNC(PyObject*) PyUnicode_RPartition( - PyObject *s, /* String to partition */ - PyObject *sep /* String separator */ - ); - -/* Split a string giving a list of Unicode strings. - - If sep is NULL, splitting will be done at all whitespace - substrings. Otherwise, splits occur at the given separator. - - At most maxsplit splits will be done. But unlike PyUnicode_Split - PyUnicode_RSplit splits from the end of the string. If negative, - no limit is set. - - Separators are not included in the resulting list. - -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_RSplit( - PyObject *s, /* String to split */ - PyObject *sep, /* String separator */ - Py_ssize_t maxsplit /* Maxsplit count */ - ); - -/* Translate a string by applying a character mapping table to it and - return the resulting Unicode object. - - The mapping table must map Unicode ordinal integers to Unicode - ordinal integers or None (causing deletion of the character). - - Mapping tables may be dictionaries or sequences. Unmapped character - ordinals (ones which cause a LookupError) are left untouched and - are copied as-is. - -*/ - -PyAPI_FUNC(PyObject *) PyUnicode_Translate( - PyObject *str, /* String */ - PyObject *table, /* Translate table */ - const char *errors /* error handling */ - ); - -/* Join a sequence of strings using the given separator and return - the resulting Unicode string. */ - -PyAPI_FUNC(PyObject*) PyUnicode_Join( - PyObject *separator, /* Separator string */ - PyObject *seq /* Sequence object */ - ); - -/* Return 1 if substr matches str[start:end] at the given tail end, 0 - otherwise. */ - -PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( - PyObject *str, /* String */ - PyObject *substr, /* Prefix or Suffix string */ - Py_ssize_t start, /* Start index */ - Py_ssize_t end, /* Stop index */ - int direction /* Tail end: -1 prefix, +1 suffix */ - ); - -/* Return the first position of substr in str[start:end] using the - given search direction or -1 if not found. -2 is returned in case - an error occurred and an exception is set. */ - -PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( - PyObject *str, /* String */ - PyObject *substr, /* Substring to find */ - Py_ssize_t start, /* Start index */ - Py_ssize_t end, /* Stop index */ - int direction /* Find direction: +1 forward, -1 backward */ - ); - -/* Count the number of occurrences of substr in str[start:end]. */ - -PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( - PyObject *str, /* String */ - PyObject *substr, /* Substring to count */ - Py_ssize_t start, /* Start index */ - Py_ssize_t end /* Stop index */ - ); - -/* Replace at most maxcount occurrences of substr in str with replstr - and return the resulting Unicode object. */ - -PyAPI_FUNC(PyObject *) PyUnicode_Replace( - PyObject *str, /* String */ - PyObject *substr, /* Substring to find */ - PyObject *replstr, /* Substring to replace */ - Py_ssize_t maxcount /* Max. number of replacements to apply; - -1 = all */ - ); - -/* Compare two strings and return -1, 0, 1 for less than, equal, - greater than resp. */ - -PyAPI_FUNC(int) PyUnicode_Compare( - PyObject *left, /* Left string */ - PyObject *right /* Right string */ - ); - -/* Rich compare two strings and return one of the following: - - - NULL in case an exception was raised - - Py_True or Py_False for successfuly comparisons - - Py_NotImplemented in case the type combination is unknown - - Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in - case the conversion of the arguments to Unicode fails with a - UnicodeDecodeError. - - Possible values for op: - - Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE - -*/ - -PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( - PyObject *left, /* Left string */ - PyObject *right, /* Right string */ - int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ - ); - -/* Apply an argument tuple or dictionary to a format string and return - the resulting Unicode string. */ - -PyAPI_FUNC(PyObject *) PyUnicode_Format( - PyObject *format, /* Format string */ - PyObject *args /* Argument tuple or dictionary */ - ); - -/* Checks whether element is contained in container and return 1/0 - accordingly. - - element has to coerce to a one element Unicode string. -1 is - returned in case of an error. */ - -PyAPI_FUNC(int) PyUnicode_Contains( - PyObject *container, /* Container string */ - PyObject *element /* Element string */ - ); - -/* Externally visible for str.strip(unicode) */ -PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( - PyUnicodeObject *self, - int striptype, - PyObject *sepobj - ); - -/* === Characters Type APIs =============================================== */ - -/* Helper array used by Py_UNICODE_ISSPACE(). */ - -PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; - -/* These should not be used directly. Use the Py_UNICODE_IS* and - Py_UNICODE_TO* macros instead. - - These APIs are implemented in Objects/unicodectype.c. - -*/ - -PyAPI_FUNC(int) _PyUnicode_IsLowercase( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsUppercase( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsTitlecase( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsWhitespace( - const Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsLinebreak( - const Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_ToDigit( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(double) _PyUnicode_ToNumeric( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsDigit( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsNumeric( - Py_UNICODE ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsAlpha( - Py_UNICODE ch /* Unicode character */ - ); - -#ifdef __cplusplus -} -#endif -#endif /* Py_USING_UNICODE */ -#endif /* !Py_UNICODEOBJECT_H */
--- a/test/include/python2.7/warnings.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -#ifndef Py_WARNINGS_H -#define Py_WARNINGS_H -#ifdef __cplusplus -extern "C" { -#endif - -PyAPI_FUNC(void) _PyWarnings_Init(void); - -PyAPI_FUNC(int) PyErr_WarnEx(PyObject *, const char *, Py_ssize_t); -PyAPI_FUNC(int) PyErr_WarnExplicit(PyObject *, const char *, const char *, int, - const char *, PyObject *); - -#define PyErr_WarnPy3k(msg, stacklevel) \ - (Py_Py3kWarningFlag ? PyErr_WarnEx(PyExc_DeprecationWarning, msg, stacklevel) : 0) - -/* DEPRECATED: Use PyErr_WarnEx() instead. */ -#define PyErr_Warn(category, msg) PyErr_WarnEx(category, msg, 1) - -#ifdef __cplusplus -} -#endif -#endif /* !Py_WARNINGS_H */ -
--- a/test/include/python2.7/weakrefobject.h Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -/* Weak references objects for Python. */ - -#ifndef Py_WEAKREFOBJECT_H -#define Py_WEAKREFOBJECT_H -#ifdef __cplusplus -extern "C" { -#endif - - -typedef struct _PyWeakReference PyWeakReference; - -/* PyWeakReference is the base struct for the Python ReferenceType, ProxyType, - * and CallableProxyType. - */ -struct _PyWeakReference { - PyObject_HEAD - - /* The object to which this is a weak reference, or Py_None if none. - * Note that this is a stealth reference: wr_object's refcount is - * not incremented to reflect this pointer. - */ - PyObject *wr_object; - - /* A callable to invoke when wr_object dies, or NULL if none. */ - PyObject *wr_callback; - - /* A cache for wr_object's hash code. As usual for hashes, this is -1 - * if the hash code isn't known yet. - */ - long hash; - - /* If wr_object is weakly referenced, wr_object has a doubly-linked NULL- - * terminated list of weak references to it. These are the list pointers. - * If wr_object goes away, wr_object is set to Py_None, and these pointers - * have no meaning then. - */ - PyWeakReference *wr_prev; - PyWeakReference *wr_next; -}; - -PyAPI_DATA(PyTypeObject) _PyWeakref_RefType; -PyAPI_DATA(PyTypeObject) _PyWeakref_ProxyType; -PyAPI_DATA(PyTypeObject) _PyWeakref_CallableProxyType; - -#define PyWeakref_CheckRef(op) PyObject_TypeCheck(op, &_PyWeakref_RefType) -#define PyWeakref_CheckRefExact(op) \ - (Py_TYPE(op) == &_PyWeakref_RefType) -#define PyWeakref_CheckProxy(op) \ - ((Py_TYPE(op) == &_PyWeakref_ProxyType) || \ - (Py_TYPE(op) == &_PyWeakref_CallableProxyType)) - -#define PyWeakref_Check(op) \ - (PyWeakref_CheckRef(op) || PyWeakref_CheckProxy(op)) - - -PyAPI_FUNC(PyObject *) PyWeakref_NewRef(PyObject *ob, - PyObject *callback); -PyAPI_FUNC(PyObject *) PyWeakref_NewProxy(PyObject *ob, - PyObject *callback); -PyAPI_FUNC(PyObject *) PyWeakref_GetObject(PyObject *ref); - -PyAPI_FUNC(Py_ssize_t) _PyWeakref_GetWeakrefCount(PyWeakReference *head); - -PyAPI_FUNC(void) _PyWeakref_ClearRef(PyWeakReference *self); - -/* Explanation for the Py_REFCNT() check: when a weakref's target is part - of a long chain of deallocations which triggers the trashcan mechanism, - clearing the weakrefs can be delayed long after the target's refcount - has dropped to zero. In the meantime, code accessing the weakref will - be able to "see" the target object even though it is supposed to be - unreachable. See issue #16602. */ - -#define PyWeakref_GET_OBJECT(ref) \ - (Py_REFCNT(((PyWeakReference *)(ref))->wr_object) > 0 \ - ? ((PyWeakReference *)(ref))->wr_object \ - : Py_None) - - -#ifdef __cplusplus -} -#endif -#endif /* !Py_WEAKREFOBJECT_H */
--- a/test/lib/python2.7/UserDict.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,213 +0,0 @@ -"""A more or less complete user-defined wrapper around dictionary objects.""" - -class UserDict: - def __init__(*args, **kwargs): - if not args: - raise TypeError("descriptor '__init__' of 'UserDict' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - if args: - dict = args[0] - elif 'dict' in kwargs: - dict = kwargs.pop('dict') - import warnings - warnings.warn("Passing 'dict' as keyword argument is " - "deprecated", PendingDeprecationWarning, - stacklevel=2) - else: - dict = None - self.data = {} - if dict is not None: - self.update(dict) - if len(kwargs): - self.update(kwargs) - def __repr__(self): return repr(self.data) - def __cmp__(self, dict): - if isinstance(dict, UserDict): - return cmp(self.data, dict.data) - else: - return cmp(self.data, dict) - __hash__ = None # Avoid Py3k warning - def __len__(self): return len(self.data) - def __getitem__(self, key): - if key in self.data: - return self.data[key] - if hasattr(self.__class__, "__missing__"): - return self.__class__.__missing__(self, key) - raise KeyError(key) - def __setitem__(self, key, item): self.data[key] = item - def __delitem__(self, key): del self.data[key] - def clear(self): self.data.clear() - def copy(self): - if self.__class__ is UserDict: - return UserDict(self.data.copy()) - import copy - data = self.data - try: - self.data = {} - c = copy.copy(self) - finally: - self.data = data - c.update(self) - return c - def keys(self): return self.data.keys() - def items(self): return self.data.items() - def iteritems(self): return self.data.iteritems() - def iterkeys(self): return self.data.iterkeys() - def itervalues(self): return self.data.itervalues() - def values(self): return self.data.values() - def has_key(self, key): return key in self.data - def update(*args, **kwargs): - if not args: - raise TypeError("descriptor 'update' of 'UserDict' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - if args: - dict = args[0] - elif 'dict' in kwargs: - dict = kwargs.pop('dict') - import warnings - warnings.warn("Passing 'dict' as keyword argument is deprecated", - PendingDeprecationWarning, stacklevel=2) - else: - dict = None - if dict is None: - pass - elif isinstance(dict, UserDict): - self.data.update(dict.data) - elif isinstance(dict, type({})) or not hasattr(dict, 'items'): - self.data.update(dict) - else: - for k, v in dict.items(): - self[k] = v - if len(kwargs): - self.data.update(kwargs) - def get(self, key, failobj=None): - if key not in self: - return failobj - return self[key] - def setdefault(self, key, failobj=None): - if key not in self: - self[key] = failobj - return self[key] - def pop(self, key, *args): - return self.data.pop(key, *args) - def popitem(self): - return self.data.popitem() - def __contains__(self, key): - return key in self.data - @classmethod - def fromkeys(cls, iterable, value=None): - d = cls() - for key in iterable: - d[key] = value - return d - -class IterableUserDict(UserDict): - def __iter__(self): - return iter(self.data) - -import _abcoll -_abcoll.MutableMapping.register(IterableUserDict) - - -class DictMixin: - # Mixin defining all dictionary methods for classes that already have - # a minimum dictionary interface including getitem, setitem, delitem, - # and keys. Without knowledge of the subclass constructor, the mixin - # does not define __init__() or copy(). In addition to the four base - # methods, progressively more efficiency comes with defining - # __contains__(), __iter__(), and iteritems(). - - # second level definitions support higher levels - def __iter__(self): - for k in self.keys(): - yield k - def has_key(self, key): - try: - self[key] - except KeyError: - return False - return True - def __contains__(self, key): - return self.has_key(key) - - # third level takes advantage of second level definitions - def iteritems(self): - for k in self: - yield (k, self[k]) - def iterkeys(self): - return self.__iter__() - - # fourth level uses definitions from lower levels - def itervalues(self): - for _, v in self.iteritems(): - yield v - def values(self): - return [v for _, v in self.iteritems()] - def items(self): - return list(self.iteritems()) - def clear(self): - for key in self.keys(): - del self[key] - def setdefault(self, key, default=None): - try: - return self[key] - except KeyError: - self[key] = default - return default - def pop(self, key, *args): - if len(args) > 1: - raise TypeError, "pop expected at most 2 arguments, got "\ - + repr(1 + len(args)) - try: - value = self[key] - except KeyError: - if args: - return args[0] - raise - del self[key] - return value - def popitem(self): - try: - k, v = self.iteritems().next() - except StopIteration: - raise KeyError, 'container is empty' - del self[k] - return (k, v) - def update(self, other=None, **kwargs): - # Make progressively weaker assumptions about "other" - if other is None: - pass - elif hasattr(other, 'iteritems'): # iteritems saves memory and lookups - for k, v in other.iteritems(): - self[k] = v - elif hasattr(other, 'keys'): - for k in other.keys(): - self[k] = other[k] - else: - for k, v in other: - self[k] = v - if kwargs: - self.update(kwargs) - def get(self, key, default=None): - try: - return self[key] - except KeyError: - return default - def __repr__(self): - return repr(dict(self.iteritems())) - def __cmp__(self, other): - if other is None: - return 1 - if isinstance(other, DictMixin): - other = dict(other.iteritems()) - return cmp(dict(self.iteritems()), other) - def __len__(self): - return len(self.keys())
--- a/test/lib/python2.7/_abcoll.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,695 +0,0 @@ -# Copyright 2007 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Abstract Base Classes (ABCs) for collections, according to PEP 3119. - -DON'T USE THIS MODULE DIRECTLY! The classes here should be imported -via collections; they are defined here only to alleviate certain -bootstrapping issues. Unit tests are in test_collections. -""" - -from abc import ABCMeta, abstractmethod -import sys - -__all__ = ["Hashable", "Iterable", "Iterator", - "Sized", "Container", "Callable", - "Set", "MutableSet", - "Mapping", "MutableMapping", - "MappingView", "KeysView", "ItemsView", "ValuesView", - "Sequence", "MutableSequence", - ] - -### ONE-TRICK PONIES ### - -def _hasattr(C, attr): - try: - return any(attr in B.__dict__ for B in C.__mro__) - except AttributeError: - # Old-style class - return hasattr(C, attr) - - -class Hashable: - __metaclass__ = ABCMeta - - @abstractmethod - def __hash__(self): - return 0 - - @classmethod - def __subclasshook__(cls, C): - if cls is Hashable: - try: - for B in C.__mro__: - if "__hash__" in B.__dict__: - if B.__dict__["__hash__"]: - return True - break - except AttributeError: - # Old-style class - if getattr(C, "__hash__", None): - return True - return NotImplemented - - -class Iterable: - __metaclass__ = ABCMeta - - @abstractmethod - def __iter__(self): - while False: - yield None - - @classmethod - def __subclasshook__(cls, C): - if cls is Iterable: - if _hasattr(C, "__iter__"): - return True - return NotImplemented - -Iterable.register(str) - - -class Iterator(Iterable): - - @abstractmethod - def next(self): - 'Return the next item from the iterator. When exhausted, raise StopIteration' - raise StopIteration - - def __iter__(self): - return self - - @classmethod - def __subclasshook__(cls, C): - if cls is Iterator: - if _hasattr(C, "next") and _hasattr(C, "__iter__"): - return True - return NotImplemented - - -class Sized: - __metaclass__ = ABCMeta - - @abstractmethod - def __len__(self): - return 0 - - @classmethod - def __subclasshook__(cls, C): - if cls is Sized: - if _hasattr(C, "__len__"): - return True - return NotImplemented - - -class Container: - __metaclass__ = ABCMeta - - @abstractmethod - def __contains__(self, x): - return False - - @classmethod - def __subclasshook__(cls, C): - if cls is Container: - if _hasattr(C, "__contains__"): - return True - return NotImplemented - - -class Callable: - __metaclass__ = ABCMeta - - @abstractmethod - def __call__(self, *args, **kwds): - return False - - @classmethod - def __subclasshook__(cls, C): - if cls is Callable: - if _hasattr(C, "__call__"): - return True - return NotImplemented - - -### SETS ### - - -class Set(Sized, Iterable, Container): - """A set is a finite, iterable container. - - This class provides concrete generic implementations of all - methods except for __contains__, __iter__ and __len__. - - To override the comparisons (presumably for speed, as the - semantics are fixed), redefine __le__ and __ge__, - then the other operations will automatically follow suit. - """ - - def __le__(self, other): - if not isinstance(other, Set): - return NotImplemented - if len(self) > len(other): - return False - for elem in self: - if elem not in other: - return False - return True - - def __lt__(self, other): - if not isinstance(other, Set): - return NotImplemented - return len(self) < len(other) and self.__le__(other) - - def __gt__(self, other): - if not isinstance(other, Set): - return NotImplemented - return len(self) > len(other) and self.__ge__(other) - - def __ge__(self, other): - if not isinstance(other, Set): - return NotImplemented - if len(self) < len(other): - return False - for elem in other: - if elem not in self: - return False - return True - - def __eq__(self, other): - if not isinstance(other, Set): - return NotImplemented - return len(self) == len(other) and self.__le__(other) - - def __ne__(self, other): - return not (self == other) - - @classmethod - def _from_iterable(cls, it): - '''Construct an instance of the class from any iterable input. - - Must override this method if the class constructor signature - does not accept an iterable for an input. - ''' - return cls(it) - - def __and__(self, other): - if not isinstance(other, Iterable): - return NotImplemented - return self._from_iterable(value for value in other if value in self) - - __rand__ = __and__ - - def isdisjoint(self, other): - 'Return True if two sets have a null intersection.' - for value in other: - if value in self: - return False - return True - - def __or__(self, other): - if not isinstance(other, Iterable): - return NotImplemented - chain = (e for s in (self, other) for e in s) - return self._from_iterable(chain) - - __ror__ = __or__ - - def __sub__(self, other): - if not isinstance(other, Set): - if not isinstance(other, Iterable): - return NotImplemented - other = self._from_iterable(other) - return self._from_iterable(value for value in self - if value not in other) - - def __rsub__(self, other): - if not isinstance(other, Set): - if not isinstance(other, Iterable): - return NotImplemented - other = self._from_iterable(other) - return self._from_iterable(value for value in other - if value not in self) - - def __xor__(self, other): - if not isinstance(other, Set): - if not isinstance(other, Iterable): - return NotImplemented - other = self._from_iterable(other) - return (self - other) | (other - self) - - __rxor__ = __xor__ - - # Sets are not hashable by default, but subclasses can change this - __hash__ = None - - def _hash(self): - """Compute the hash value of a set. - - Note that we don't define __hash__: not all sets are hashable. - But if you define a hashable set type, its __hash__ should - call this function. - - This must be compatible __eq__. - - All sets ought to compare equal if they contain the same - elements, regardless of how they are implemented, and - regardless of the order of the elements; so there's not much - freedom for __eq__ or __hash__. We match the algorithm used - by the built-in frozenset type. - """ - MAX = sys.maxint - MASK = 2 * MAX + 1 - n = len(self) - h = 1927868237 * (n + 1) - h &= MASK - for x in self: - hx = hash(x) - h ^= (hx ^ (hx << 16) ^ 89869747) * 3644798167 - h &= MASK - h = h * 69069 + 907133923 - h &= MASK - if h > MAX: - h -= MASK + 1 - if h == -1: - h = 590923713 - return h - -Set.register(frozenset) - - -class MutableSet(Set): - """A mutable set is a finite, iterable container. - - This class provides concrete generic implementations of all - methods except for __contains__, __iter__, __len__, - add(), and discard(). - - To override the comparisons (presumably for speed, as the - semantics are fixed), all you have to do is redefine __le__ and - then the other operations will automatically follow suit. - """ - - @abstractmethod - def add(self, value): - """Add an element.""" - raise NotImplementedError - - @abstractmethod - def discard(self, value): - """Remove an element. Do not raise an exception if absent.""" - raise NotImplementedError - - def remove(self, value): - """Remove an element. If not a member, raise a KeyError.""" - if value not in self: - raise KeyError(value) - self.discard(value) - - def pop(self): - """Return the popped value. Raise KeyError if empty.""" - it = iter(self) - try: - value = next(it) - except StopIteration: - raise KeyError - self.discard(value) - return value - - def clear(self): - """This is slow (creates N new iterators!) but effective.""" - try: - while True: - self.pop() - except KeyError: - pass - - def __ior__(self, it): - for value in it: - self.add(value) - return self - - def __iand__(self, it): - for value in (self - it): - self.discard(value) - return self - - def __ixor__(self, it): - if it is self: - self.clear() - else: - if not isinstance(it, Set): - it = self._from_iterable(it) - for value in it: - if value in self: - self.discard(value) - else: - self.add(value) - return self - - def __isub__(self, it): - if it is self: - self.clear() - else: - for value in it: - self.discard(value) - return self - -MutableSet.register(set) - - -### MAPPINGS ### - - -class Mapping(Sized, Iterable, Container): - - """A Mapping is a generic container for associating key/value - pairs. - - This class provides concrete generic implementations of all - methods except for __getitem__, __iter__, and __len__. - - """ - - @abstractmethod - def __getitem__(self, key): - raise KeyError - - def get(self, key, default=None): - 'D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None.' - try: - return self[key] - except KeyError: - return default - - def __contains__(self, key): - try: - self[key] - except KeyError: - return False - else: - return True - - def iterkeys(self): - 'D.iterkeys() -> an iterator over the keys of D' - return iter(self) - - def itervalues(self): - 'D.itervalues() -> an iterator over the values of D' - for key in self: - yield self[key] - - def iteritems(self): - 'D.iteritems() -> an iterator over the (key, value) items of D' - for key in self: - yield (key, self[key]) - - def keys(self): - "D.keys() -> list of D's keys" - return list(self) - - def items(self): - "D.items() -> list of D's (key, value) pairs, as 2-tuples" - return [(key, self[key]) for key in self] - - def values(self): - "D.values() -> list of D's values" - return [self[key] for key in self] - - # Mappings are not hashable by default, but subclasses can change this - __hash__ = None - - def __eq__(self, other): - if not isinstance(other, Mapping): - return NotImplemented - return dict(self.items()) == dict(other.items()) - - def __ne__(self, other): - return not (self == other) - -class MappingView(Sized): - - def __init__(self, mapping): - self._mapping = mapping - - def __len__(self): - return len(self._mapping) - - def __repr__(self): - return '{0.__class__.__name__}({0._mapping!r})'.format(self) - - -class KeysView(MappingView, Set): - - @classmethod - def _from_iterable(self, it): - return set(it) - - def __contains__(self, key): - return key in self._mapping - - def __iter__(self): - for key in self._mapping: - yield key - -KeysView.register(type({}.viewkeys())) - -class ItemsView(MappingView, Set): - - @classmethod - def _from_iterable(self, it): - return set(it) - - def __contains__(self, item): - key, value = item - try: - v = self._mapping[key] - except KeyError: - return False - else: - return v == value - - def __iter__(self): - for key in self._mapping: - yield (key, self._mapping[key]) - -ItemsView.register(type({}.viewitems())) - -class ValuesView(MappingView): - - def __contains__(self, value): - for key in self._mapping: - if value == self._mapping[key]: - return True - return False - - def __iter__(self): - for key in self._mapping: - yield self._mapping[key] - -ValuesView.register(type({}.viewvalues())) - -class MutableMapping(Mapping): - - """A MutableMapping is a generic container for associating - key/value pairs. - - This class provides concrete generic implementations of all - methods except for __getitem__, __setitem__, __delitem__, - __iter__, and __len__. - - """ - - @abstractmethod - def __setitem__(self, key, value): - raise KeyError - - @abstractmethod - def __delitem__(self, key): - raise KeyError - - __marker = object() - - def pop(self, key, default=__marker): - '''D.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised. - ''' - try: - value = self[key] - except KeyError: - if default is self.__marker: - raise - return default - else: - del self[key] - return value - - def popitem(self): - '''D.popitem() -> (k, v), remove and return some (key, value) pair - as a 2-tuple; but raise KeyError if D is empty. - ''' - try: - key = next(iter(self)) - except StopIteration: - raise KeyError - value = self[key] - del self[key] - return key, value - - def clear(self): - 'D.clear() -> None. Remove all items from D.' - try: - while True: - self.popitem() - except KeyError: - pass - - def update(*args, **kwds): - ''' D.update([E, ]**F) -> None. Update D from mapping/iterable E and F. - If E present and has a .keys() method, does: for k in E: D[k] = E[k] - If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v - In either case, this is followed by: for k, v in F.items(): D[k] = v - ''' - if not args: - raise TypeError("descriptor 'update' of 'MutableMapping' object " - "needs an argument") - self = args[0] - args = args[1:] - if len(args) > 1: - raise TypeError('update expected at most 1 arguments, got %d' % - len(args)) - if args: - other = args[0] - if isinstance(other, Mapping): - for key in other: - self[key] = other[key] - elif hasattr(other, "keys"): - for key in other.keys(): - self[key] = other[key] - else: - for key, value in other: - self[key] = value - for key, value in kwds.items(): - self[key] = value - - def setdefault(self, key, default=None): - 'D.setdefault(k[,d]) -> D.get(k,d), also set D[k]=d if k not in D' - try: - return self[key] - except KeyError: - self[key] = default - return default - -MutableMapping.register(dict) - - -### SEQUENCES ### - - -class Sequence(Sized, Iterable, Container): - """All the operations on a read-only sequence. - - Concrete subclasses must override __new__ or __init__, - __getitem__, and __len__. - """ - - @abstractmethod - def __getitem__(self, index): - raise IndexError - - def __iter__(self): - i = 0 - try: - while True: - v = self[i] - yield v - i += 1 - except IndexError: - return - - def __contains__(self, value): - for v in self: - if v == value: - return True - return False - - def __reversed__(self): - for i in reversed(range(len(self))): - yield self[i] - - def index(self, value): - '''S.index(value) -> integer -- return first index of value. - Raises ValueError if the value is not present. - ''' - for i, v in enumerate(self): - if v == value: - return i - raise ValueError - - def count(self, value): - 'S.count(value) -> integer -- return number of occurrences of value' - return sum(1 for v in self if v == value) - -Sequence.register(tuple) -Sequence.register(basestring) -Sequence.register(buffer) -Sequence.register(xrange) - - -class MutableSequence(Sequence): - - """All the operations on a read-only sequence. - - Concrete subclasses must provide __new__ or __init__, - __getitem__, __setitem__, __delitem__, __len__, and insert(). - - """ - - @abstractmethod - def __setitem__(self, index, value): - raise IndexError - - @abstractmethod - def __delitem__(self, index): - raise IndexError - - @abstractmethod - def insert(self, index, value): - 'S.insert(index, object) -- insert object before index' - raise IndexError - - def append(self, value): - 'S.append(object) -- append object to the end of the sequence' - self.insert(len(self), value) - - def reverse(self): - 'S.reverse() -- reverse *IN PLACE*' - n = len(self) - for i in range(n//2): - self[i], self[n-i-1] = self[n-i-1], self[i] - - def extend(self, values): - 'S.extend(iterable) -- extend sequence by appending elements from the iterable' - for v in values: - self.append(v) - - def pop(self, index=-1): - '''S.pop([index]) -> item -- remove and return item at index (default last). - Raise IndexError if list is empty or index is out of range. - ''' - v = self[index] - del self[index] - return v - - def remove(self, value): - '''S.remove(value) -- remove first occurrence of value. - Raise ValueError if the value is not present. - ''' - del self[self.index(value)] - - def __iadd__(self, values): - self.extend(values) - return self - -MutableSequence.register(list)
--- a/test/lib/python2.7/_weakrefset.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,204 +0,0 @@ -# Access WeakSet through the weakref module. -# This code is separated-out because it is needed -# by abc.py to load everything else at startup. - -from _weakref import ref - -__all__ = ['WeakSet'] - - -class _IterationGuard(object): - # This context manager registers itself in the current iterators of the - # weak container, such as to delay all removals until the context manager - # exits. - # This technique should be relatively thread-safe (since sets are). - - def __init__(self, weakcontainer): - # Don't create cycles - self.weakcontainer = ref(weakcontainer) - - def __enter__(self): - w = self.weakcontainer() - if w is not None: - w._iterating.add(self) - return self - - def __exit__(self, e, t, b): - w = self.weakcontainer() - if w is not None: - s = w._iterating - s.remove(self) - if not s: - w._commit_removals() - - -class WeakSet(object): - def __init__(self, data=None): - self.data = set() - def _remove(item, selfref=ref(self)): - self = selfref() - if self is not None: - if self._iterating: - self._pending_removals.append(item) - else: - self.data.discard(item) - self._remove = _remove - # A list of keys to be removed - self._pending_removals = [] - self._iterating = set() - if data is not None: - self.update(data) - - def _commit_removals(self): - l = self._pending_removals - discard = self.data.discard - while l: - discard(l.pop()) - - def __iter__(self): - with _IterationGuard(self): - for itemref in self.data: - item = itemref() - if item is not None: - # Caveat: the iterator will keep a strong reference to - # `item` until it is resumed or closed. - yield item - - def __len__(self): - return len(self.data) - len(self._pending_removals) - - def __contains__(self, item): - try: - wr = ref(item) - except TypeError: - return False - return wr in self.data - - def __reduce__(self): - return (self.__class__, (list(self),), - getattr(self, '__dict__', None)) - - __hash__ = None - - def add(self, item): - if self._pending_removals: - self._commit_removals() - self.data.add(ref(item, self._remove)) - - def clear(self): - if self._pending_removals: - self._commit_removals() - self.data.clear() - - def copy(self): - return self.__class__(self) - - def pop(self): - if self._pending_removals: - self._commit_removals() - while True: - try: - itemref = self.data.pop() - except KeyError: - raise KeyError('pop from empty WeakSet') - item = itemref() - if item is not None: - return item - - def remove(self, item): - if self._pending_removals: - self._commit_removals() - self.data.remove(ref(item)) - - def discard(self, item): - if self._pending_removals: - self._commit_removals() - self.data.discard(ref(item)) - - def update(self, other): - if self._pending_removals: - self._commit_removals() - for element in other: - self.add(element) - - def __ior__(self, other): - self.update(other) - return self - - def difference(self, other): - newset = self.copy() - newset.difference_update(other) - return newset - __sub__ = difference - - def difference_update(self, other): - self.__isub__(other) - def __isub__(self, other): - if self._pending_removals: - self._commit_removals() - if self is other: - self.data.clear() - else: - self.data.difference_update(ref(item) for item in other) - return self - - def intersection(self, other): - return self.__class__(item for item in other if item in self) - __and__ = intersection - - def intersection_update(self, other): - self.__iand__(other) - def __iand__(self, other): - if self._pending_removals: - self._commit_removals() - self.data.intersection_update(ref(item) for item in other) - return self - - def issubset(self, other): - return self.data.issubset(ref(item) for item in other) - __le__ = issubset - - def __lt__(self, other): - return self.data < set(ref(item) for item in other) - - def issuperset(self, other): - return self.data.issuperset(ref(item) for item in other) - __ge__ = issuperset - - def __gt__(self, other): - return self.data > set(ref(item) for item in other) - - def __eq__(self, other): - if not isinstance(other, self.__class__): - return NotImplemented - return self.data == set(ref(item) for item in other) - - def __ne__(self, other): - opposite = self.__eq__(other) - if opposite is NotImplemented: - return NotImplemented - return not opposite - - def symmetric_difference(self, other): - newset = self.copy() - newset.symmetric_difference_update(other) - return newset - __xor__ = symmetric_difference - - def symmetric_difference_update(self, other): - self.__ixor__(other) - def __ixor__(self, other): - if self._pending_removals: - self._commit_removals() - if self is other: - self.data.clear() - else: - self.data.symmetric_difference_update(ref(item, self._remove) for item in other) - return self - - def union(self, other): - return self.__class__(e for s in (self, other) for e in s) - __or__ = union - - def isdisjoint(self, other): - return len(self.intersection(other)) == 0
--- a/test/lib/python2.7/abc.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,185 +0,0 @@ -# Copyright 2007 Google, Inc. All Rights Reserved. -# Licensed to PSF under a Contributor Agreement. - -"""Abstract Base Classes (ABCs) according to PEP 3119.""" - -import types - -from _weakrefset import WeakSet - -# Instance of old-style class -class _C: pass -_InstanceType = type(_C()) - - -def abstractmethod(funcobj): - """A decorator indicating abstract methods. - - Requires that the metaclass is ABCMeta or derived from it. A - class that has a metaclass derived from ABCMeta cannot be - instantiated unless all of its abstract methods are overridden. - The abstract methods can be called using any of the normal - 'super' call mechanisms. - - Usage: - - class C: - __metaclass__ = ABCMeta - @abstractmethod - def my_abstract_method(self, ...): - ... - """ - funcobj.__isabstractmethod__ = True - return funcobj - - -class abstractproperty(property): - """A decorator indicating abstract properties. - - Requires that the metaclass is ABCMeta or derived from it. A - class that has a metaclass derived from ABCMeta cannot be - instantiated unless all of its abstract properties are overridden. - The abstract properties can be called using any of the normal - 'super' call mechanisms. - - Usage: - - class C: - __metaclass__ = ABCMeta - @abstractproperty - def my_abstract_property(self): - ... - - This defines a read-only property; you can also define a read-write - abstract property using the 'long' form of property declaration: - - class C: - __metaclass__ = ABCMeta - def getx(self): ... - def setx(self, value): ... - x = abstractproperty(getx, setx) - """ - __isabstractmethod__ = True - - -class ABCMeta(type): - - """Metaclass for defining Abstract Base Classes (ABCs). - - Use this metaclass to create an ABC. An ABC can be subclassed - directly, and then acts as a mix-in class. You can also register - unrelated concrete classes (even built-in classes) and unrelated - ABCs as 'virtual subclasses' -- these and their descendants will - be considered subclasses of the registering ABC by the built-in - issubclass() function, but the registering ABC won't show up in - their MRO (Method Resolution Order) nor will method - implementations defined by the registering ABC be callable (not - even via super()). - - """ - - # A global counter that is incremented each time a class is - # registered as a virtual subclass of anything. It forces the - # negative cache to be cleared before its next use. - _abc_invalidation_counter = 0 - - def __new__(mcls, name, bases, namespace): - cls = super(ABCMeta, mcls).__new__(mcls, name, bases, namespace) - # Compute set of abstract method names - abstracts = set(name - for name, value in namespace.items() - if getattr(value, "__isabstractmethod__", False)) - for base in bases: - for name in getattr(base, "__abstractmethods__", set()): - value = getattr(cls, name, None) - if getattr(value, "__isabstractmethod__", False): - abstracts.add(name) - cls.__abstractmethods__ = frozenset(abstracts) - # Set up inheritance registry - cls._abc_registry = WeakSet() - cls._abc_cache = WeakSet() - cls._abc_negative_cache = WeakSet() - cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter - return cls - - def register(cls, subclass): - """Register a virtual subclass of an ABC.""" - if not isinstance(subclass, (type, types.ClassType)): - raise TypeError("Can only register classes") - if issubclass(subclass, cls): - return # Already a subclass - # Subtle: test for cycles *after* testing for "already a subclass"; - # this means we allow X.register(X) and interpret it as a no-op. - if issubclass(cls, subclass): - # This would create a cycle, which is bad for the algorithm below - raise RuntimeError("Refusing to create an inheritance cycle") - cls._abc_registry.add(subclass) - ABCMeta._abc_invalidation_counter += 1 # Invalidate negative cache - - def _dump_registry(cls, file=None): - """Debug helper to print the ABC registry.""" - print >> file, "Class: %s.%s" % (cls.__module__, cls.__name__) - print >> file, "Inv.counter: %s" % ABCMeta._abc_invalidation_counter - for name in sorted(cls.__dict__.keys()): - if name.startswith("_abc_"): - value = getattr(cls, name) - print >> file, "%s: %r" % (name, value) - - def __instancecheck__(cls, instance): - """Override for isinstance(instance, cls).""" - # Inline the cache checking when it's simple. - subclass = getattr(instance, '__class__', None) - if subclass is not None and subclass in cls._abc_cache: - return True - subtype = type(instance) - # Old-style instances - if subtype is _InstanceType: - subtype = subclass - if subtype is subclass or subclass is None: - if (cls._abc_negative_cache_version == - ABCMeta._abc_invalidation_counter and - subtype in cls._abc_negative_cache): - return False - # Fall back to the subclass check. - return cls.__subclasscheck__(subtype) - return (cls.__subclasscheck__(subclass) or - cls.__subclasscheck__(subtype)) - - def __subclasscheck__(cls, subclass): - """Override for issubclass(subclass, cls).""" - # Check cache - if subclass in cls._abc_cache: - return True - # Check negative cache; may have to invalidate - if cls._abc_negative_cache_version < ABCMeta._abc_invalidation_counter: - # Invalidate the negative cache - cls._abc_negative_cache = WeakSet() - cls._abc_negative_cache_version = ABCMeta._abc_invalidation_counter - elif subclass in cls._abc_negative_cache: - return False - # Check the subclass hook - ok = cls.__subclasshook__(subclass) - if ok is not NotImplemented: - assert isinstance(ok, bool) - if ok: - cls._abc_cache.add(subclass) - else: - cls._abc_negative_cache.add(subclass) - return ok - # Check if it's a direct subclass - if cls in getattr(subclass, '__mro__', ()): - cls._abc_cache.add(subclass) - return True - # Check if it's a subclass of a registered class (recursive) - for rcls in cls._abc_registry: - if issubclass(subclass, rcls): - cls._abc_cache.add(subclass) - return True - # Check if it's a subclass of a subclass (recursive) - for scls in cls.__subclasses__(): - if issubclass(subclass, scls): - cls._abc_cache.add(subclass) - return True - # No dice; update negative cache - cls._abc_negative_cache.add(subclass) - return False
--- a/test/lib/python2.7/codecs.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1113 +0,0 @@ -""" codecs -- Python Codec Registry, API and helpers. - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -"""#" - -import __builtin__, sys - -### Registry and builtin stateless codec functions - -try: - from _codecs import * -except ImportError, why: - raise SystemError('Failed to load the builtin codecs: %s' % why) - -__all__ = ["register", "lookup", "open", "EncodedFile", "BOM", "BOM_BE", - "BOM_LE", "BOM32_BE", "BOM32_LE", "BOM64_BE", "BOM64_LE", - "BOM_UTF8", "BOM_UTF16", "BOM_UTF16_LE", "BOM_UTF16_BE", - "BOM_UTF32", "BOM_UTF32_LE", "BOM_UTF32_BE", - "CodecInfo", "Codec", "IncrementalEncoder", "IncrementalDecoder", - "StreamReader", "StreamWriter", - "StreamReaderWriter", "StreamRecoder", - "getencoder", "getdecoder", "getincrementalencoder", - "getincrementaldecoder", "getreader", "getwriter", - "encode", "decode", "iterencode", "iterdecode", - "strict_errors", "ignore_errors", "replace_errors", - "xmlcharrefreplace_errors", "backslashreplace_errors", - "register_error", "lookup_error"] - -### Constants - -# -# Byte Order Mark (BOM = ZERO WIDTH NO-BREAK SPACE = U+FEFF) -# and its possible byte string values -# for UTF8/UTF16/UTF32 output and little/big endian machines -# - -# UTF-8 -BOM_UTF8 = '\xef\xbb\xbf' - -# UTF-16, little endian -BOM_LE = BOM_UTF16_LE = '\xff\xfe' - -# UTF-16, big endian -BOM_BE = BOM_UTF16_BE = '\xfe\xff' - -# UTF-32, little endian -BOM_UTF32_LE = '\xff\xfe\x00\x00' - -# UTF-32, big endian -BOM_UTF32_BE = '\x00\x00\xfe\xff' - -if sys.byteorder == 'little': - - # UTF-16, native endianness - BOM = BOM_UTF16 = BOM_UTF16_LE - - # UTF-32, native endianness - BOM_UTF32 = BOM_UTF32_LE - -else: - - # UTF-16, native endianness - BOM = BOM_UTF16 = BOM_UTF16_BE - - # UTF-32, native endianness - BOM_UTF32 = BOM_UTF32_BE - -# Old broken names (don't use in new code) -BOM32_LE = BOM_UTF16_LE -BOM32_BE = BOM_UTF16_BE -BOM64_LE = BOM_UTF32_LE -BOM64_BE = BOM_UTF32_BE - - -### Codec base classes (defining the API) - -class CodecInfo(tuple): - """Codec details when looking up the codec registry""" - - # Private API to allow Python to blacklist the known non-Unicode - # codecs in the standard library. A more general mechanism to - # reliably distinguish test encodings from other codecs will hopefully - # be defined for Python 3.5 - # - # See http://bugs.python.org/issue19619 - _is_text_encoding = True # Assume codecs are text encodings by default - - def __new__(cls, encode, decode, streamreader=None, streamwriter=None, - incrementalencoder=None, incrementaldecoder=None, name=None, - _is_text_encoding=None): - self = tuple.__new__(cls, (encode, decode, streamreader, streamwriter)) - self.name = name - self.encode = encode - self.decode = decode - self.incrementalencoder = incrementalencoder - self.incrementaldecoder = incrementaldecoder - self.streamwriter = streamwriter - self.streamreader = streamreader - if _is_text_encoding is not None: - self._is_text_encoding = _is_text_encoding - return self - - def __repr__(self): - return "<%s.%s object for encoding %s at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self)) - -class Codec: - - """ Defines the interface for stateless encoders/decoders. - - The .encode()/.decode() methods may use different error - handling schemes by providing the errors argument. These - string values are predefined: - - 'strict' - raise a ValueError error (or a subclass) - 'ignore' - ignore the character and continue with the next - 'replace' - replace with a suitable replacement character; - Python will use the official U+FFFD REPLACEMENT - CHARACTER for the builtin Unicode codecs on - decoding and '?' on encoding. - 'xmlcharrefreplace' - Replace with the appropriate XML - character reference (only for encoding). - 'backslashreplace' - Replace with backslashed escape sequences - (only for encoding). - - The set of allowed values can be extended via register_error. - - """ - def encode(self, input, errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling. - - The method may not store state in the Codec instance. Use - StreamWriter for codecs which have to keep state in order to - make encoding efficient. - - The encoder must be able to handle zero length input and - return an empty object of the output object type in this - situation. - - """ - raise NotImplementedError - - def decode(self, input, errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling. - - The method may not store state in the Codec instance. Use - StreamReader for codecs which have to keep state in order to - make decoding efficient. - - The decoder must be able to handle zero length input and - return an empty object of the output object type in this - situation. - - """ - raise NotImplementedError - -class IncrementalEncoder(object): - """ - An IncrementalEncoder encodes an input in multiple steps. The input can be - passed piece by piece to the encode() method. The IncrementalEncoder remembers - the state of the Encoding process between calls to encode(). - """ - def __init__(self, errors='strict'): - """ - Creates an IncrementalEncoder instance. - - The IncrementalEncoder may use different error handling schemes by - providing the errors keyword argument. See the module docstring - for a list of possible values. - """ - self.errors = errors - self.buffer = "" - - def encode(self, input, final=False): - """ - Encodes input and returns the resulting object. - """ - raise NotImplementedError - - def reset(self): - """ - Resets the encoder to the initial state. - """ - - def getstate(self): - """ - Return the current state of the encoder. - """ - return 0 - - def setstate(self, state): - """ - Set the current state of the encoder. state must have been - returned by getstate(). - """ - -class BufferedIncrementalEncoder(IncrementalEncoder): - """ - This subclass of IncrementalEncoder can be used as the baseclass for an - incremental encoder if the encoder must keep some of the output in a - buffer between calls to encode(). - """ - def __init__(self, errors='strict'): - IncrementalEncoder.__init__(self, errors) - self.buffer = "" # unencoded input that is kept between calls to encode() - - def _buffer_encode(self, input, errors, final): - # Overwrite this method in subclasses: It must encode input - # and return an (output, length consumed) tuple - raise NotImplementedError - - def encode(self, input, final=False): - # encode input (taking the buffer into account) - data = self.buffer + input - (result, consumed) = self._buffer_encode(data, self.errors, final) - # keep unencoded input until the next call - self.buffer = data[consumed:] - return result - - def reset(self): - IncrementalEncoder.reset(self) - self.buffer = "" - - def getstate(self): - return self.buffer or 0 - - def setstate(self, state): - self.buffer = state or "" - -class IncrementalDecoder(object): - """ - An IncrementalDecoder decodes an input in multiple steps. The input can be - passed piece by piece to the decode() method. The IncrementalDecoder - remembers the state of the decoding process between calls to decode(). - """ - def __init__(self, errors='strict'): - """ - Creates a IncrementalDecoder instance. - - The IncrementalDecoder may use different error handling schemes by - providing the errors keyword argument. See the module docstring - for a list of possible values. - """ - self.errors = errors - - def decode(self, input, final=False): - """ - Decodes input and returns the resulting object. - """ - raise NotImplementedError - - def reset(self): - """ - Resets the decoder to the initial state. - """ - - def getstate(self): - """ - Return the current state of the decoder. - - This must be a (buffered_input, additional_state_info) tuple. - buffered_input must be a bytes object containing bytes that - were passed to decode() that have not yet been converted. - additional_state_info must be a non-negative integer - representing the state of the decoder WITHOUT yet having - processed the contents of buffered_input. In the initial state - and after reset(), getstate() must return (b"", 0). - """ - return (b"", 0) - - def setstate(self, state): - """ - Set the current state of the decoder. - - state must have been returned by getstate(). The effect of - setstate((b"", 0)) must be equivalent to reset(). - """ - -class BufferedIncrementalDecoder(IncrementalDecoder): - """ - This subclass of IncrementalDecoder can be used as the baseclass for an - incremental decoder if the decoder must be able to handle incomplete byte - sequences. - """ - def __init__(self, errors='strict'): - IncrementalDecoder.__init__(self, errors) - self.buffer = "" # undecoded input that is kept between calls to decode() - - def _buffer_decode(self, input, errors, final): - # Overwrite this method in subclasses: It must decode input - # and return an (output, length consumed) tuple - raise NotImplementedError - - def decode(self, input, final=False): - # decode input (taking the buffer into account) - data = self.buffer + input - (result, consumed) = self._buffer_decode(data, self.errors, final) - # keep undecoded input until the next call - self.buffer = data[consumed:] - return result - - def reset(self): - IncrementalDecoder.reset(self) - self.buffer = "" - - def getstate(self): - # additional state info is always 0 - return (self.buffer, 0) - - def setstate(self, state): - # ignore additional state info - self.buffer = state[0] - -# -# The StreamWriter and StreamReader class provide generic working -# interfaces which can be used to implement new encoding submodules -# very easily. See encodings/utf_8.py for an example on how this is -# done. -# - -class StreamWriter(Codec): - - def __init__(self, stream, errors='strict'): - - """ Creates a StreamWriter instance. - - stream must be a file-like object open for writing - (binary) data. - - The StreamWriter may use different error handling - schemes by providing the errors keyword argument. These - parameters are predefined: - - 'strict' - raise a ValueError (or a subclass) - 'ignore' - ignore the character and continue with the next - 'replace'- replace with a suitable replacement character - 'xmlcharrefreplace' - Replace with the appropriate XML - character reference. - 'backslashreplace' - Replace with backslashed escape - sequences (only for encoding). - - The set of allowed parameter values can be extended via - register_error. - """ - self.stream = stream - self.errors = errors - - def write(self, object): - - """ Writes the object's contents encoded to self.stream. - """ - data, consumed = self.encode(object, self.errors) - self.stream.write(data) - - def writelines(self, list): - - """ Writes the concatenated list of strings to the stream - using .write(). - """ - self.write(''.join(list)) - - def reset(self): - - """ Flushes and resets the codec buffers used for keeping state. - - Calling this method should ensure that the data on the - output is put into a clean state, that allows appending - of new fresh data without having to rescan the whole - stream to recover state. - - """ - pass - - def seek(self, offset, whence=0): - self.stream.seek(offset, whence) - if whence == 0 and offset == 0: - self.reset() - - def __getattr__(self, name, - getattr=getattr): - - """ Inherit all other methods from the underlying stream. - """ - return getattr(self.stream, name) - - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - self.stream.close() - -### - -class StreamReader(Codec): - - def __init__(self, stream, errors='strict'): - - """ Creates a StreamReader instance. - - stream must be a file-like object open for reading - (binary) data. - - The StreamReader may use different error handling - schemes by providing the errors keyword argument. These - parameters are predefined: - - 'strict' - raise a ValueError (or a subclass) - 'ignore' - ignore the character and continue with the next - 'replace'- replace with a suitable replacement character; - - The set of allowed parameter values can be extended via - register_error. - """ - self.stream = stream - self.errors = errors - self.bytebuffer = "" - # For str->str decoding this will stay a str - # For str->unicode decoding the first read will promote it to unicode - self.charbuffer = "" - self.linebuffer = None - - def decode(self, input, errors='strict'): - raise NotImplementedError - - def read(self, size=-1, chars=-1, firstline=False): - - """ Decodes data from the stream self.stream and returns the - resulting object. - - chars indicates the number of characters to read from the - stream. read() will never return more than chars - characters, but it might return less, if there are not enough - characters available. - - size indicates the approximate maximum number of bytes to - read from the stream for decoding purposes. The decoder - can modify this setting as appropriate. The default value - -1 indicates to read and decode as much as possible. size - is intended to prevent having to decode huge files in one - step. - - If firstline is true, and a UnicodeDecodeError happens - after the first line terminator in the input only the first line - will be returned, the rest of the input will be kept until the - next call to read(). - - The method should use a greedy read strategy meaning that - it should read as much data as is allowed within the - definition of the encoding and the given size, e.g. if - optional encoding endings or state markers are available - on the stream, these should be read too. - """ - # If we have lines cached, first merge them back into characters - if self.linebuffer: - self.charbuffer = "".join(self.linebuffer) - self.linebuffer = None - - # read until we get the required number of characters (if available) - while True: - # can the request be satisfied from the character buffer? - if chars >= 0: - if len(self.charbuffer) >= chars: - break - elif size >= 0: - if len(self.charbuffer) >= size: - break - # we need more data - if size < 0: - newdata = self.stream.read() - else: - newdata = self.stream.read(size) - # decode bytes (those remaining from the last call included) - data = self.bytebuffer + newdata - try: - newchars, decodedbytes = self.decode(data, self.errors) - except UnicodeDecodeError, exc: - if firstline: - newchars, decodedbytes = self.decode(data[:exc.start], self.errors) - lines = newchars.splitlines(True) - if len(lines)<=1: - raise - else: - raise - # keep undecoded bytes until the next call - self.bytebuffer = data[decodedbytes:] - # put new characters in the character buffer - self.charbuffer += newchars - # there was no data available - if not newdata: - break - if chars < 0: - # Return everything we've got - result = self.charbuffer - self.charbuffer = "" - else: - # Return the first chars characters - result = self.charbuffer[:chars] - self.charbuffer = self.charbuffer[chars:] - return result - - def readline(self, size=None, keepends=True): - - """ Read one line from the input stream and return the - decoded data. - - size, if given, is passed as size argument to the - read() method. - - """ - # If we have lines cached from an earlier read, return - # them unconditionally - if self.linebuffer: - line = self.linebuffer[0] - del self.linebuffer[0] - if len(self.linebuffer) == 1: - # revert to charbuffer mode; we might need more data - # next time - self.charbuffer = self.linebuffer[0] - self.linebuffer = None - if not keepends: - line = line.splitlines(False)[0] - return line - - readsize = size or 72 - line = "" - # If size is given, we call read() only once - while True: - data = self.read(readsize, firstline=True) - if data: - # If we're at a "\r" read one extra character (which might - # be a "\n") to get a proper line ending. If the stream is - # temporarily exhausted we return the wrong line ending. - if data.endswith("\r"): - data += self.read(size=1, chars=1) - - line += data - lines = line.splitlines(True) - if lines: - if len(lines) > 1: - # More than one line result; the first line is a full line - # to return - line = lines[0] - del lines[0] - if len(lines) > 1: - # cache the remaining lines - lines[-1] += self.charbuffer - self.linebuffer = lines - self.charbuffer = None - else: - # only one remaining line, put it back into charbuffer - self.charbuffer = lines[0] + self.charbuffer - if not keepends: - line = line.splitlines(False)[0] - break - line0withend = lines[0] - line0withoutend = lines[0].splitlines(False)[0] - if line0withend != line0withoutend: # We really have a line end - # Put the rest back together and keep it until the next call - self.charbuffer = "".join(lines[1:]) + self.charbuffer - if keepends: - line = line0withend - else: - line = line0withoutend - break - # we didn't get anything or this was our only try - if not data or size is not None: - if line and not keepends: - line = line.splitlines(False)[0] - break - if readsize<8000: - readsize *= 2 - return line - - def readlines(self, sizehint=None, keepends=True): - - """ Read all lines available on the input stream - and return them as list of lines. - - Line breaks are implemented using the codec's decoder - method and are included in the list entries. - - sizehint, if given, is ignored since there is no efficient - way to finding the true end-of-line. - - """ - data = self.read() - return data.splitlines(keepends) - - def reset(self): - - """ Resets the codec buffers used for keeping state. - - Note that no stream repositioning should take place. - This method is primarily intended to be able to recover - from decoding errors. - - """ - self.bytebuffer = "" - self.charbuffer = u"" - self.linebuffer = None - - def seek(self, offset, whence=0): - """ Set the input stream's current position. - - Resets the codec buffers used for keeping state. - """ - self.stream.seek(offset, whence) - self.reset() - - def next(self): - - """ Return the next decoded line from the input stream.""" - line = self.readline() - if line: - return line - raise StopIteration - - def __iter__(self): - return self - - def __getattr__(self, name, - getattr=getattr): - - """ Inherit all other methods from the underlying stream. - """ - return getattr(self.stream, name) - - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - self.stream.close() - -### - -class StreamReaderWriter: - - """ StreamReaderWriter instances allow wrapping streams which - work in both read and write modes. - - The design is such that one can use the factory functions - returned by the codec.lookup() function to construct the - instance. - - """ - # Optional attributes set by the file wrappers below - encoding = 'unknown' - - def __init__(self, stream, Reader, Writer, errors='strict'): - - """ Creates a StreamReaderWriter instance. - - stream must be a Stream-like object. - - Reader, Writer must be factory functions or classes - providing the StreamReader, StreamWriter interface resp. - - Error handling is done in the same way as defined for the - StreamWriter/Readers. - - """ - self.stream = stream - self.reader = Reader(stream, errors) - self.writer = Writer(stream, errors) - self.errors = errors - - def read(self, size=-1): - - return self.reader.read(size) - - def readline(self, size=None): - - return self.reader.readline(size) - - def readlines(self, sizehint=None): - - return self.reader.readlines(sizehint) - - def next(self): - - """ Return the next decoded line from the input stream.""" - return self.reader.next() - - def __iter__(self): - return self - - def write(self, data): - - return self.writer.write(data) - - def writelines(self, list): - - return self.writer.writelines(list) - - def reset(self): - - self.reader.reset() - self.writer.reset() - - def seek(self, offset, whence=0): - self.stream.seek(offset, whence) - self.reader.reset() - if whence == 0 and offset == 0: - self.writer.reset() - - def __getattr__(self, name, - getattr=getattr): - - """ Inherit all other methods from the underlying stream. - """ - return getattr(self.stream, name) - - # these are needed to make "with codecs.open(...)" work properly - - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - self.stream.close() - -### - -class StreamRecoder: - - """ StreamRecoder instances provide a frontend - backend - view of encoding data. - - They use the complete set of APIs returned by the - codecs.lookup() function to implement their task. - - Data written to the stream is first decoded into an - intermediate format (which is dependent on the given codec - combination) and then written to the stream using an instance - of the provided Writer class. - - In the other direction, data is read from the stream using a - Reader instance and then return encoded data to the caller. - - """ - # Optional attributes set by the file wrappers below - data_encoding = 'unknown' - file_encoding = 'unknown' - - def __init__(self, stream, encode, decode, Reader, Writer, - errors='strict'): - - """ Creates a StreamRecoder instance which implements a two-way - conversion: encode and decode work on the frontend (the - input to .read() and output of .write()) while - Reader and Writer work on the backend (reading and - writing to the stream). - - You can use these objects to do transparent direct - recodings from e.g. latin-1 to utf-8 and back. - - stream must be a file-like object. - - encode, decode must adhere to the Codec interface, Reader, - Writer must be factory functions or classes providing the - StreamReader, StreamWriter interface resp. - - encode and decode are needed for the frontend translation, - Reader and Writer for the backend translation. Unicode is - used as intermediate encoding. - - Error handling is done in the same way as defined for the - StreamWriter/Readers. - - """ - self.stream = stream - self.encode = encode - self.decode = decode - self.reader = Reader(stream, errors) - self.writer = Writer(stream, errors) - self.errors = errors - - def read(self, size=-1): - - data = self.reader.read(size) - data, bytesencoded = self.encode(data, self.errors) - return data - - def readline(self, size=None): - - if size is None: - data = self.reader.readline() - else: - data = self.reader.readline(size) - data, bytesencoded = self.encode(data, self.errors) - return data - - def readlines(self, sizehint=None): - - data = self.reader.read() - data, bytesencoded = self.encode(data, self.errors) - return data.splitlines(1) - - def next(self): - - """ Return the next decoded line from the input stream.""" - data = self.reader.next() - data, bytesencoded = self.encode(data, self.errors) - return data - - def __iter__(self): - return self - - def write(self, data): - - data, bytesdecoded = self.decode(data, self.errors) - return self.writer.write(data) - - def writelines(self, list): - - data = ''.join(list) - data, bytesdecoded = self.decode(data, self.errors) - return self.writer.write(data) - - def reset(self): - - self.reader.reset() - self.writer.reset() - - def __getattr__(self, name, - getattr=getattr): - - """ Inherit all other methods from the underlying stream. - """ - return getattr(self.stream, name) - - def __enter__(self): - return self - - def __exit__(self, type, value, tb): - self.stream.close() - -### Shortcuts - -def open(filename, mode='rb', encoding=None, errors='strict', buffering=1): - - """ Open an encoded file using the given mode and return - a wrapped version providing transparent encoding/decoding. - - Note: The wrapped version will only accept the object format - defined by the codecs, i.e. Unicode objects for most builtin - codecs. Output is also codec dependent and will usually be - Unicode as well. - - Files are always opened in binary mode, even if no binary mode - was specified. This is done to avoid data loss due to encodings - using 8-bit values. The default file mode is 'rb' meaning to - open the file in binary read mode. - - encoding specifies the encoding which is to be used for the - file. - - errors may be given to define the error handling. It defaults - to 'strict' which causes ValueErrors to be raised in case an - encoding error occurs. - - buffering has the same meaning as for the builtin open() API. - It defaults to line buffered. - - The returned wrapped file object provides an extra attribute - .encoding which allows querying the used encoding. This - attribute is only available if an encoding was specified as - parameter. - - """ - if encoding is not None: - if 'U' in mode: - # No automatic conversion of '\n' is done on reading and writing - mode = mode.strip().replace('U', '') - if mode[:1] not in set('rwa'): - mode = 'r' + mode - if 'b' not in mode: - # Force opening of the file in binary mode - mode = mode + 'b' - file = __builtin__.open(filename, mode, buffering) - if encoding is None: - return file - info = lookup(encoding) - srw = StreamReaderWriter(file, info.streamreader, info.streamwriter, errors) - # Add attributes to simplify introspection - srw.encoding = encoding - return srw - -def EncodedFile(file, data_encoding, file_encoding=None, errors='strict'): - - """ Return a wrapped version of file which provides transparent - encoding translation. - - Strings written to the wrapped file are interpreted according - to the given data_encoding and then written to the original - file as string using file_encoding. The intermediate encoding - will usually be Unicode but depends on the specified codecs. - - Strings are read from the file using file_encoding and then - passed back to the caller as string using data_encoding. - - If file_encoding is not given, it defaults to data_encoding. - - errors may be given to define the error handling. It defaults - to 'strict' which causes ValueErrors to be raised in case an - encoding error occurs. - - The returned wrapped file object provides two extra attributes - .data_encoding and .file_encoding which reflect the given - parameters of the same name. The attributes can be used for - introspection by Python programs. - - """ - if file_encoding is None: - file_encoding = data_encoding - data_info = lookup(data_encoding) - file_info = lookup(file_encoding) - sr = StreamRecoder(file, data_info.encode, data_info.decode, - file_info.streamreader, file_info.streamwriter, errors) - # Add attributes to simplify introspection - sr.data_encoding = data_encoding - sr.file_encoding = file_encoding - return sr - -### Helpers for codec lookup - -def getencoder(encoding): - - """ Lookup up the codec for the given encoding and return - its encoder function. - - Raises a LookupError in case the encoding cannot be found. - - """ - return lookup(encoding).encode - -def getdecoder(encoding): - - """ Lookup up the codec for the given encoding and return - its decoder function. - - Raises a LookupError in case the encoding cannot be found. - - """ - return lookup(encoding).decode - -def getincrementalencoder(encoding): - - """ Lookup up the codec for the given encoding and return - its IncrementalEncoder class or factory function. - - Raises a LookupError in case the encoding cannot be found - or the codecs doesn't provide an incremental encoder. - - """ - encoder = lookup(encoding).incrementalencoder - if encoder is None: - raise LookupError(encoding) - return encoder - -def getincrementaldecoder(encoding): - - """ Lookup up the codec for the given encoding and return - its IncrementalDecoder class or factory function. - - Raises a LookupError in case the encoding cannot be found - or the codecs doesn't provide an incremental decoder. - - """ - decoder = lookup(encoding).incrementaldecoder - if decoder is None: - raise LookupError(encoding) - return decoder - -def getreader(encoding): - - """ Lookup up the codec for the given encoding and return - its StreamReader class or factory function. - - Raises a LookupError in case the encoding cannot be found. - - """ - return lookup(encoding).streamreader - -def getwriter(encoding): - - """ Lookup up the codec for the given encoding and return - its StreamWriter class or factory function. - - Raises a LookupError in case the encoding cannot be found. - - """ - return lookup(encoding).streamwriter - -def iterencode(iterator, encoding, errors='strict', **kwargs): - """ - Encoding iterator. - - Encodes the input strings from the iterator using a IncrementalEncoder. - - errors and kwargs are passed through to the IncrementalEncoder - constructor. - """ - encoder = getincrementalencoder(encoding)(errors, **kwargs) - for input in iterator: - output = encoder.encode(input) - if output: - yield output - output = encoder.encode("", True) - if output: - yield output - -def iterdecode(iterator, encoding, errors='strict', **kwargs): - """ - Decoding iterator. - - Decodes the input strings from the iterator using a IncrementalDecoder. - - errors and kwargs are passed through to the IncrementalDecoder - constructor. - """ - decoder = getincrementaldecoder(encoding)(errors, **kwargs) - for input in iterator: - output = decoder.decode(input) - if output: - yield output - output = decoder.decode("", True) - if output: - yield output - -### Helpers for charmap-based codecs - -def make_identity_dict(rng): - - """ make_identity_dict(rng) -> dict - - Return a dictionary where elements of the rng sequence are - mapped to themselves. - - """ - res = {} - for i in rng: - res[i]=i - return res - -def make_encoding_map(decoding_map): - - """ Creates an encoding map from a decoding map. - - If a target mapping in the decoding map occurs multiple - times, then that target is mapped to None (undefined mapping), - causing an exception when encountered by the charmap codec - during translation. - - One example where this happens is cp875.py which decodes - multiple character to \\u001a. - - """ - m = {} - for k,v in decoding_map.items(): - if not v in m: - m[v] = k - else: - m[v] = None - return m - -### error handlers - -try: - strict_errors = lookup_error("strict") - ignore_errors = lookup_error("ignore") - replace_errors = lookup_error("replace") - xmlcharrefreplace_errors = lookup_error("xmlcharrefreplace") - backslashreplace_errors = lookup_error("backslashreplace") -except LookupError: - # In --disable-unicode builds, these error handler are missing - strict_errors = None - ignore_errors = None - replace_errors = None - xmlcharrefreplace_errors = None - backslashreplace_errors = None - -# Tell modulefinder that using codecs probably needs the encodings -# package -_false = 0 -if _false: - import encodings - -### Tests - -if __name__ == '__main__': - - # Make stdout translate Latin-1 output into UTF-8 output - sys.stdout = EncodedFile(sys.stdout, 'latin-1', 'utf-8') - - # Have stdin translate Latin-1 input into UTF-8 input - sys.stdin = EncodedFile(sys.stdin, 'utf-8', 'latin-1')
--- a/test/lib/python2.7/copy_reg.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,201 +0,0 @@ -"""Helper to provide extensibility for pickle/cPickle. - -This is only useful to add pickle support for extension types defined in -C, not for instances of user-defined classes. -""" - -from types import ClassType as _ClassType - -__all__ = ["pickle", "constructor", - "add_extension", "remove_extension", "clear_extension_cache"] - -dispatch_table = {} - -def pickle(ob_type, pickle_function, constructor_ob=None): - if type(ob_type) is _ClassType: - raise TypeError("copy_reg is not intended for use with classes") - - if not hasattr(pickle_function, '__call__'): - raise TypeError("reduction functions must be callable") - dispatch_table[ob_type] = pickle_function - - # The constructor_ob function is a vestige of safe for unpickling. - # There is no reason for the caller to pass it anymore. - if constructor_ob is not None: - constructor(constructor_ob) - -def constructor(object): - if not hasattr(object, '__call__'): - raise TypeError("constructors must be callable") - -# Example: provide pickling support for complex numbers. - -try: - complex -except NameError: - pass -else: - - def pickle_complex(c): - return complex, (c.real, c.imag) - - pickle(complex, pickle_complex, complex) - -# Support for pickling new-style objects - -def _reconstructor(cls, base, state): - if base is object: - obj = object.__new__(cls) - else: - obj = base.__new__(cls, state) - if base.__init__ != object.__init__: - base.__init__(obj, state) - return obj - -_HEAPTYPE = 1<<9 - -# Python code for object.__reduce_ex__ for protocols 0 and 1 - -def _reduce_ex(self, proto): - assert proto < 2 - for base in self.__class__.__mro__: - if hasattr(base, '__flags__') and not base.__flags__ & _HEAPTYPE: - break - else: - base = object # not really reachable - if base is object: - state = None - else: - if base is self.__class__: - raise TypeError, "can't pickle %s objects" % base.__name__ - state = base(self) - args = (self.__class__, base, state) - try: - getstate = self.__getstate__ - except AttributeError: - if getattr(self, "__slots__", None): - raise TypeError("a class that defines __slots__ without " - "defining __getstate__ cannot be pickled") - try: - dict = self.__dict__ - except AttributeError: - dict = None - else: - dict = getstate() - if dict: - return _reconstructor, args, dict - else: - return _reconstructor, args - -# Helper for __reduce_ex__ protocol 2 - -def __newobj__(cls, *args): - return cls.__new__(cls, *args) - -def _slotnames(cls): - """Return a list of slot names for a given class. - - This needs to find slots defined by the class and its bases, so we - can't simply return the __slots__ attribute. We must walk down - the Method Resolution Order and concatenate the __slots__ of each - class found there. (This assumes classes don't modify their - __slots__ attribute to misrepresent their slots after the class is - defined.) - """ - - # Get the value from a cache in the class if possible - names = cls.__dict__.get("__slotnames__") - if names is not None: - return names - - # Not cached -- calculate the value - names = [] - if not hasattr(cls, "__slots__"): - # This class has no slots - pass - else: - # Slots found -- gather slot names from all base classes - for c in cls.__mro__: - if "__slots__" in c.__dict__: - slots = c.__dict__['__slots__'] - # if class has a single slot, it can be given as a string - if isinstance(slots, basestring): - slots = (slots,) - for name in slots: - # special descriptors - if name in ("__dict__", "__weakref__"): - continue - # mangled names - elif name.startswith('__') and not name.endswith('__'): - names.append('_%s%s' % (c.__name__, name)) - else: - names.append(name) - - # Cache the outcome in the class if at all possible - try: - cls.__slotnames__ = names - except: - pass # But don't die if we can't - - return names - -# A registry of extension codes. This is an ad-hoc compression -# mechanism. Whenever a global reference to <module>, <name> is about -# to be pickled, the (<module>, <name>) tuple is looked up here to see -# if it is a registered extension code for it. Extension codes are -# universal, so that the meaning of a pickle does not depend on -# context. (There are also some codes reserved for local use that -# don't have this restriction.) Codes are positive ints; 0 is -# reserved. - -_extension_registry = {} # key -> code -_inverted_registry = {} # code -> key -_extension_cache = {} # code -> object -# Don't ever rebind those names: cPickle grabs a reference to them when -# it's initialized, and won't see a rebinding. - -def add_extension(module, name, code): - """Register an extension code.""" - code = int(code) - if not 1 <= code <= 0x7fffffff: - raise ValueError, "code out of range" - key = (module, name) - if (_extension_registry.get(key) == code and - _inverted_registry.get(code) == key): - return # Redundant registrations are benign - if key in _extension_registry: - raise ValueError("key %s is already registered with code %s" % - (key, _extension_registry[key])) - if code in _inverted_registry: - raise ValueError("code %s is already in use for key %s" % - (code, _inverted_registry[code])) - _extension_registry[key] = code - _inverted_registry[code] = key - -def remove_extension(module, name, code): - """Unregister an extension code. For testing only.""" - key = (module, name) - if (_extension_registry.get(key) != code or - _inverted_registry.get(code) != key): - raise ValueError("key %s is not registered with code %s" % - (key, code)) - del _extension_registry[key] - del _inverted_registry[code] - if code in _extension_cache: - del _extension_cache[code] - -def clear_extension_cache(): - _extension_cache.clear() - -# Standard extension code assignments - -# Reserved ranges - -# First Last Count Purpose -# 1 127 127 Reserved for Python standard library -# 128 191 64 Reserved for Zope -# 192 239 48 Reserved for 3rd parties -# 240 255 16 Reserved for private use (will never be assigned) -# 256 Inf Inf Reserved for future assignment - -# Extension codes are assigned by the Python Software Foundation.
--- a/test/lib/python2.7/distutils/__init__.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,101 +0,0 @@ -import os -import sys -import warnings -import imp -import opcode # opcode is not a virtualenv module, so we can use it to find the stdlib - # Important! To work on pypy, this must be a module that resides in the - # lib-python/modified-x.y.z directory - -dirname = os.path.dirname - -distutils_path = os.path.join(os.path.dirname(opcode.__file__), 'distutils') -if os.path.normpath(distutils_path) == os.path.dirname(os.path.normpath(__file__)): - warnings.warn( - "The virtualenv distutils package at %s appears to be in the same location as the system distutils?") -else: - __path__.insert(0, distutils_path) - real_distutils = imp.load_module("_virtualenv_distutils", None, distutils_path, ('', '', imp.PKG_DIRECTORY)) - # Copy the relevant attributes - try: - __revision__ = real_distutils.__revision__ - except AttributeError: - pass - __version__ = real_distutils.__version__ - -from distutils import dist, sysconfig - -try: - basestring -except NameError: - basestring = str - -## patch build_ext (distutils doesn't know how to get the libs directory -## path on windows - it hardcodes the paths around the patched sys.prefix) - -if sys.platform == 'win32': - from distutils.command.build_ext import build_ext as old_build_ext - class build_ext(old_build_ext): - def finalize_options (self): - if self.library_dirs is None: - self.library_dirs = [] - elif isinstance(self.library_dirs, basestring): - self.library_dirs = self.library_dirs.split(os.pathsep) - - self.library_dirs.insert(0, os.path.join(sys.real_prefix, "Libs")) - old_build_ext.finalize_options(self) - - from distutils.command import build_ext as build_ext_module - build_ext_module.build_ext = build_ext - -## distutils.dist patches: - -old_find_config_files = dist.Distribution.find_config_files -def find_config_files(self): - found = old_find_config_files(self) - system_distutils = os.path.join(distutils_path, 'distutils.cfg') - #if os.path.exists(system_distutils): - # found.insert(0, system_distutils) - # What to call the per-user config file - if os.name == 'posix': - user_filename = ".pydistutils.cfg" - else: - user_filename = "pydistutils.cfg" - user_filename = os.path.join(sys.prefix, user_filename) - if os.path.isfile(user_filename): - for item in list(found): - if item.endswith('pydistutils.cfg'): - found.remove(item) - found.append(user_filename) - return found -dist.Distribution.find_config_files = find_config_files - -## distutils.sysconfig patches: - -old_get_python_inc = sysconfig.get_python_inc -def sysconfig_get_python_inc(plat_specific=0, prefix=None): - if prefix is None: - prefix = sys.real_prefix - return old_get_python_inc(plat_specific, prefix) -sysconfig_get_python_inc.__doc__ = old_get_python_inc.__doc__ -sysconfig.get_python_inc = sysconfig_get_python_inc - -old_get_python_lib = sysconfig.get_python_lib -def sysconfig_get_python_lib(plat_specific=0, standard_lib=0, prefix=None): - if standard_lib and prefix is None: - prefix = sys.real_prefix - return old_get_python_lib(plat_specific, standard_lib, prefix) -sysconfig_get_python_lib.__doc__ = old_get_python_lib.__doc__ -sysconfig.get_python_lib = sysconfig_get_python_lib - -old_get_config_vars = sysconfig.get_config_vars -def sysconfig_get_config_vars(*args): - real_vars = old_get_config_vars(*args) - if sys.platform == 'win32': - lib_dir = os.path.join(sys.real_prefix, "libs") - if isinstance(real_vars, dict) and 'LIBDIR' not in real_vars: - real_vars['LIBDIR'] = lib_dir # asked for all - elif isinstance(real_vars, list) and 'LIBDIR' in args: - real_vars = real_vars + [lib_dir] # asked for list - return real_vars -sysconfig_get_config_vars.__doc__ = old_get_config_vars.__doc__ -sysconfig.get_config_vars = sysconfig_get_config_vars
--- a/test/lib/python2.7/distutils/distutils.cfg Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -# This is a config file local to this virtualenv installation -# You may include options that will be used by all distutils commands, -# and by easy_install. For instance: -# -# [easy_install] -# find_links = http://mylocalsite
--- a/test/lib/python2.7/encodings/__init__.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,157 +0,0 @@ -""" Standard "encodings" Package - - Standard Python encoding modules are stored in this package - directory. - - Codec modules must have names corresponding to normalized encoding - names as defined in the normalize_encoding() function below, e.g. - 'utf-8' must be implemented by the module 'utf_8.py'. - - Each codec module must export the following interface: - - * getregentry() -> codecs.CodecInfo object - The getregentry() API must a CodecInfo object with encoder, decoder, - incrementalencoder, incrementaldecoder, streamwriter and streamreader - atttributes which adhere to the Python Codec Interface Standard. - - In addition, a module may optionally also define the following - APIs which are then used by the package's codec search function: - - * getaliases() -> sequence of encoding name strings to use as aliases - - Alias names returned by getaliases() must be normalized encoding - names as defined by normalize_encoding(). - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -"""#" - -import codecs -from encodings import aliases -import __builtin__ - -_cache = {} -_unknown = '--unknown--' -_import_tail = ['*'] -_norm_encoding_map = (' . ' - '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ ' - ' abcdefghijklmnopqrstuvwxyz ' - ' ' - ' ' - ' ') -_aliases = aliases.aliases - -class CodecRegistryError(LookupError, SystemError): - pass - -def normalize_encoding(encoding): - - """ Normalize an encoding name. - - Normalization works as follows: all non-alphanumeric - characters except the dot used for Python package names are - collapsed and replaced with a single underscore, e.g. ' -;#' - becomes '_'. Leading and trailing underscores are removed. - - Note that encoding names should be ASCII only; if they do use - non-ASCII characters, these must be Latin-1 compatible. - - """ - # Make sure we have an 8-bit string, because .translate() works - # differently for Unicode strings. - if hasattr(__builtin__, "unicode") and isinstance(encoding, unicode): - # Note that .encode('latin-1') does *not* use the codec - # registry, so this call doesn't recurse. (See unicodeobject.c - # PyUnicode_AsEncodedString() for details) - encoding = encoding.encode('latin-1') - return '_'.join(encoding.translate(_norm_encoding_map).split()) - -def search_function(encoding): - - # Cache lookup - entry = _cache.get(encoding, _unknown) - if entry is not _unknown: - return entry - - # Import the module: - # - # First try to find an alias for the normalized encoding - # name and lookup the module using the aliased name, then try to - # lookup the module using the standard import scheme, i.e. first - # try in the encodings package, then at top-level. - # - norm_encoding = normalize_encoding(encoding) - aliased_encoding = _aliases.get(norm_encoding) or \ - _aliases.get(norm_encoding.replace('.', '_')) - if aliased_encoding is not None: - modnames = [aliased_encoding, - norm_encoding] - else: - modnames = [norm_encoding] - for modname in modnames: - if not modname or '.' in modname: - continue - try: - # Import is absolute to prevent the possibly malicious import of a - # module with side-effects that is not in the 'encodings' package. - mod = __import__('encodings.' + modname, fromlist=_import_tail, - level=0) - except ImportError: - pass - else: - break - else: - mod = None - - try: - getregentry = mod.getregentry - except AttributeError: - # Not a codec module - mod = None - - if mod is None: - # Cache misses - _cache[encoding] = None - return None - - # Now ask the module for the registry entry - entry = getregentry() - if not isinstance(entry, codecs.CodecInfo): - if not 4 <= len(entry) <= 7: - raise CodecRegistryError,\ - 'module "%s" (%s) failed to register' % \ - (mod.__name__, mod.__file__) - if not hasattr(entry[0], '__call__') or \ - not hasattr(entry[1], '__call__') or \ - (entry[2] is not None and not hasattr(entry[2], '__call__')) or \ - (entry[3] is not None and not hasattr(entry[3], '__call__')) or \ - (len(entry) > 4 and entry[4] is not None and not hasattr(entry[4], '__call__')) or \ - (len(entry) > 5 and entry[5] is not None and not hasattr(entry[5], '__call__')): - raise CodecRegistryError,\ - 'incompatible codecs in module "%s" (%s)' % \ - (mod.__name__, mod.__file__) - if len(entry)<7 or entry[6] is None: - entry += (None,)*(6-len(entry)) + (mod.__name__.split(".", 1)[1],) - entry = codecs.CodecInfo(*entry) - - # Cache the codec registry entry - _cache[encoding] = entry - - # Register its aliases (without overwriting previously registered - # aliases) - try: - codecaliases = mod.getaliases() - except AttributeError: - pass - else: - for alias in codecaliases: - if alias not in _aliases: - _aliases[alias] = modname - - # Return the registry entry - return entry - -# Register the search_function in the Python codec registry -codecs.register(search_function)
--- a/test/lib/python2.7/encodings/aliases.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,527 +0,0 @@ -""" Encoding Aliases Support - - This module is used by the encodings package search function to - map encodings names to module names. - - Note that the search function normalizes the encoding names before - doing the lookup, so the mapping will have to map normalized - encoding names to module names. - - Contents: - - The following aliases dictionary contains mappings of all IANA - character set names for which the Python core library provides - codecs. In addition to these, a few Python specific codec - aliases have also been added. - -""" -aliases = { - - # Please keep this list sorted alphabetically by value ! - - # ascii codec - '646' : 'ascii', - 'ansi_x3.4_1968' : 'ascii', - 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name - 'ansi_x3.4_1986' : 'ascii', - 'cp367' : 'ascii', - 'csascii' : 'ascii', - 'ibm367' : 'ascii', - 'iso646_us' : 'ascii', - 'iso_646.irv_1991' : 'ascii', - 'iso_ir_6' : 'ascii', - 'us' : 'ascii', - 'us_ascii' : 'ascii', - - # base64_codec codec - 'base64' : 'base64_codec', - 'base_64' : 'base64_codec', - - # big5 codec - 'big5_tw' : 'big5', - 'csbig5' : 'big5', - - # big5hkscs codec - 'big5_hkscs' : 'big5hkscs', - 'hkscs' : 'big5hkscs', - - # bz2_codec codec - 'bz2' : 'bz2_codec', - - # cp037 codec - '037' : 'cp037', - 'csibm037' : 'cp037', - 'ebcdic_cp_ca' : 'cp037', - 'ebcdic_cp_nl' : 'cp037', - 'ebcdic_cp_us' : 'cp037', - 'ebcdic_cp_wt' : 'cp037', - 'ibm037' : 'cp037', - 'ibm039' : 'cp037', - - # cp1026 codec - '1026' : 'cp1026', - 'csibm1026' : 'cp1026', - 'ibm1026' : 'cp1026', - - # cp1140 codec - '1140' : 'cp1140', - 'ibm1140' : 'cp1140', - - # cp1250 codec - '1250' : 'cp1250', - 'windows_1250' : 'cp1250', - - # cp1251 codec - '1251' : 'cp1251', - 'windows_1251' : 'cp1251', - - # cp1252 codec - '1252' : 'cp1252', - 'windows_1252' : 'cp1252', - - # cp1253 codec - '1253' : 'cp1253', - 'windows_1253' : 'cp1253', - - # cp1254 codec - '1254' : 'cp1254', - 'windows_1254' : 'cp1254', - - # cp1255 codec - '1255' : 'cp1255', - 'windows_1255' : 'cp1255', - - # cp1256 codec - '1256' : 'cp1256', - 'windows_1256' : 'cp1256', - - # cp1257 codec - '1257' : 'cp1257', - 'windows_1257' : 'cp1257', - - # cp1258 codec - '1258' : 'cp1258', - 'windows_1258' : 'cp1258', - - # cp424 codec - '424' : 'cp424', - 'csibm424' : 'cp424', - 'ebcdic_cp_he' : 'cp424', - 'ibm424' : 'cp424', - - # cp437 codec - '437' : 'cp437', - 'cspc8codepage437' : 'cp437', - 'ibm437' : 'cp437', - - # cp500 codec - '500' : 'cp500', - 'csibm500' : 'cp500', - 'ebcdic_cp_be' : 'cp500', - 'ebcdic_cp_ch' : 'cp500', - 'ibm500' : 'cp500', - - # cp775 codec - '775' : 'cp775', - 'cspc775baltic' : 'cp775', - 'ibm775' : 'cp775', - - # cp850 codec - '850' : 'cp850', - 'cspc850multilingual' : 'cp850', - 'ibm850' : 'cp850', - - # cp852 codec - '852' : 'cp852', - 'cspcp852' : 'cp852', - 'ibm852' : 'cp852', - - # cp855 codec - '855' : 'cp855', - 'csibm855' : 'cp855', - 'ibm855' : 'cp855', - - # cp857 codec - '857' : 'cp857', - 'csibm857' : 'cp857', - 'ibm857' : 'cp857', - - # cp858 codec - '858' : 'cp858', - 'csibm858' : 'cp858', - 'ibm858' : 'cp858', - - # cp860 codec - '860' : 'cp860', - 'csibm860' : 'cp860', - 'ibm860' : 'cp860', - - # cp861 codec - '861' : 'cp861', - 'cp_is' : 'cp861', - 'csibm861' : 'cp861', - 'ibm861' : 'cp861', - - # cp862 codec - '862' : 'cp862', - 'cspc862latinhebrew' : 'cp862', - 'ibm862' : 'cp862', - - # cp863 codec - '863' : 'cp863', - 'csibm863' : 'cp863', - 'ibm863' : 'cp863', - - # cp864 codec - '864' : 'cp864', - 'csibm864' : 'cp864', - 'ibm864' : 'cp864', - - # cp865 codec - '865' : 'cp865', - 'csibm865' : 'cp865', - 'ibm865' : 'cp865', - - # cp866 codec - '866' : 'cp866', - 'csibm866' : 'cp866', - 'ibm866' : 'cp866', - - # cp869 codec - '869' : 'cp869', - 'cp_gr' : 'cp869', - 'csibm869' : 'cp869', - 'ibm869' : 'cp869', - - # cp932 codec - '932' : 'cp932', - 'ms932' : 'cp932', - 'mskanji' : 'cp932', - 'ms_kanji' : 'cp932', - - # cp949 codec - '949' : 'cp949', - 'ms949' : 'cp949', - 'uhc' : 'cp949', - - # cp950 codec - '950' : 'cp950', - 'ms950' : 'cp950', - - # euc_jis_2004 codec - 'jisx0213' : 'euc_jis_2004', - 'eucjis2004' : 'euc_jis_2004', - 'euc_jis2004' : 'euc_jis_2004', - - # euc_jisx0213 codec - 'eucjisx0213' : 'euc_jisx0213', - - # euc_jp codec - 'eucjp' : 'euc_jp', - 'ujis' : 'euc_jp', - 'u_jis' : 'euc_jp', - - # euc_kr codec - 'euckr' : 'euc_kr', - 'korean' : 'euc_kr', - 'ksc5601' : 'euc_kr', - 'ks_c_5601' : 'euc_kr', - 'ks_c_5601_1987' : 'euc_kr', - 'ksx1001' : 'euc_kr', - 'ks_x_1001' : 'euc_kr', - - # gb18030 codec - 'gb18030_2000' : 'gb18030', - - # gb2312 codec - 'chinese' : 'gb2312', - 'csiso58gb231280' : 'gb2312', - 'euc_cn' : 'gb2312', - 'euccn' : 'gb2312', - 'eucgb2312_cn' : 'gb2312', - 'gb2312_1980' : 'gb2312', - 'gb2312_80' : 'gb2312', - 'iso_ir_58' : 'gb2312', - - # gbk codec - '936' : 'gbk', - 'cp936' : 'gbk', - 'ms936' : 'gbk', - - # hex_codec codec - 'hex' : 'hex_codec', - - # hp_roman8 codec - 'roman8' : 'hp_roman8', - 'r8' : 'hp_roman8', - 'csHPRoman8' : 'hp_roman8', - - # hz codec - 'hzgb' : 'hz', - 'hz_gb' : 'hz', - 'hz_gb_2312' : 'hz', - - # iso2022_jp codec - 'csiso2022jp' : 'iso2022_jp', - 'iso2022jp' : 'iso2022_jp', - 'iso_2022_jp' : 'iso2022_jp', - - # iso2022_jp_1 codec - 'iso2022jp_1' : 'iso2022_jp_1', - 'iso_2022_jp_1' : 'iso2022_jp_1', - - # iso2022_jp_2 codec - 'iso2022jp_2' : 'iso2022_jp_2', - 'iso_2022_jp_2' : 'iso2022_jp_2', - - # iso2022_jp_2004 codec - 'iso_2022_jp_2004' : 'iso2022_jp_2004', - 'iso2022jp_2004' : 'iso2022_jp_2004', - - # iso2022_jp_3 codec - 'iso2022jp_3' : 'iso2022_jp_3', - 'iso_2022_jp_3' : 'iso2022_jp_3', - - # iso2022_jp_ext codec - 'iso2022jp_ext' : 'iso2022_jp_ext', - 'iso_2022_jp_ext' : 'iso2022_jp_ext', - - # iso2022_kr codec - 'csiso2022kr' : 'iso2022_kr', - 'iso2022kr' : 'iso2022_kr', - 'iso_2022_kr' : 'iso2022_kr', - - # iso8859_10 codec - 'csisolatin6' : 'iso8859_10', - 'iso_8859_10' : 'iso8859_10', - 'iso_8859_10_1992' : 'iso8859_10', - 'iso_ir_157' : 'iso8859_10', - 'l6' : 'iso8859_10', - 'latin6' : 'iso8859_10', - - # iso8859_11 codec - 'thai' : 'iso8859_11', - 'iso_8859_11' : 'iso8859_11', - 'iso_8859_11_2001' : 'iso8859_11', - - # iso8859_13 codec - 'iso_8859_13' : 'iso8859_13', - 'l7' : 'iso8859_13', - 'latin7' : 'iso8859_13', - - # iso8859_14 codec - 'iso_8859_14' : 'iso8859_14', - 'iso_8859_14_1998' : 'iso8859_14', - 'iso_celtic' : 'iso8859_14', - 'iso_ir_199' : 'iso8859_14', - 'l8' : 'iso8859_14', - 'latin8' : 'iso8859_14', - - # iso8859_15 codec - 'iso_8859_15' : 'iso8859_15', - 'l9' : 'iso8859_15', - 'latin9' : 'iso8859_15', - - # iso8859_16 codec - 'iso_8859_16' : 'iso8859_16', - 'iso_8859_16_2001' : 'iso8859_16', - 'iso_ir_226' : 'iso8859_16', - 'l10' : 'iso8859_16', - 'latin10' : 'iso8859_16', - - # iso8859_2 codec - 'csisolatin2' : 'iso8859_2', - 'iso_8859_2' : 'iso8859_2', - 'iso_8859_2_1987' : 'iso8859_2', - 'iso_ir_101' : 'iso8859_2', - 'l2' : 'iso8859_2', - 'latin2' : 'iso8859_2', - - # iso8859_3 codec - 'csisolatin3' : 'iso8859_3', - 'iso_8859_3' : 'iso8859_3', - 'iso_8859_3_1988' : 'iso8859_3', - 'iso_ir_109' : 'iso8859_3', - 'l3' : 'iso8859_3', - 'latin3' : 'iso8859_3', - - # iso8859_4 codec - 'csisolatin4' : 'iso8859_4', - 'iso_8859_4' : 'iso8859_4', - 'iso_8859_4_1988' : 'iso8859_4', - 'iso_ir_110' : 'iso8859_4', - 'l4' : 'iso8859_4', - 'latin4' : 'iso8859_4', - - # iso8859_5 codec - 'csisolatincyrillic' : 'iso8859_5', - 'cyrillic' : 'iso8859_5', - 'iso_8859_5' : 'iso8859_5', - 'iso_8859_5_1988' : 'iso8859_5', - 'iso_ir_144' : 'iso8859_5', - - # iso8859_6 codec - 'arabic' : 'iso8859_6', - 'asmo_708' : 'iso8859_6', - 'csisolatinarabic' : 'iso8859_6', - 'ecma_114' : 'iso8859_6', - 'iso_8859_6' : 'iso8859_6', - 'iso_8859_6_1987' : 'iso8859_6', - 'iso_ir_127' : 'iso8859_6', - - # iso8859_7 codec - 'csisolatingreek' : 'iso8859_7', - 'ecma_118' : 'iso8859_7', - 'elot_928' : 'iso8859_7', - 'greek' : 'iso8859_7', - 'greek8' : 'iso8859_7', - 'iso_8859_7' : 'iso8859_7', - 'iso_8859_7_1987' : 'iso8859_7', - 'iso_ir_126' : 'iso8859_7', - - # iso8859_8 codec - 'csisolatinhebrew' : 'iso8859_8', - 'hebrew' : 'iso8859_8', - 'iso_8859_8' : 'iso8859_8', - 'iso_8859_8_1988' : 'iso8859_8', - 'iso_ir_138' : 'iso8859_8', - - # iso8859_9 codec - 'csisolatin5' : 'iso8859_9', - 'iso_8859_9' : 'iso8859_9', - 'iso_8859_9_1989' : 'iso8859_9', - 'iso_ir_148' : 'iso8859_9', - 'l5' : 'iso8859_9', - 'latin5' : 'iso8859_9', - - # johab codec - 'cp1361' : 'johab', - 'ms1361' : 'johab', - - # koi8_r codec - 'cskoi8r' : 'koi8_r', - - # latin_1 codec - # - # Note that the latin_1 codec is implemented internally in C and a - # lot faster than the charmap codec iso8859_1 which uses the same - # encoding. This is why we discourage the use of the iso8859_1 - # codec and alias it to latin_1 instead. - # - '8859' : 'latin_1', - 'cp819' : 'latin_1', - 'csisolatin1' : 'latin_1', - 'ibm819' : 'latin_1', - 'iso8859' : 'latin_1', - 'iso8859_1' : 'latin_1', - 'iso_8859_1' : 'latin_1', - 'iso_8859_1_1987' : 'latin_1', - 'iso_ir_100' : 'latin_1', - 'l1' : 'latin_1', - 'latin' : 'latin_1', - 'latin1' : 'latin_1', - - # mac_cyrillic codec - 'maccyrillic' : 'mac_cyrillic', - - # mac_greek codec - 'macgreek' : 'mac_greek', - - # mac_iceland codec - 'maciceland' : 'mac_iceland', - - # mac_latin2 codec - 'maccentraleurope' : 'mac_latin2', - 'maclatin2' : 'mac_latin2', - - # mac_roman codec - 'macroman' : 'mac_roman', - - # mac_turkish codec - 'macturkish' : 'mac_turkish', - - # mbcs codec - 'dbcs' : 'mbcs', - - # ptcp154 codec - 'csptcp154' : 'ptcp154', - 'pt154' : 'ptcp154', - 'cp154' : 'ptcp154', - 'cyrillic_asian' : 'ptcp154', - - # quopri_codec codec - 'quopri' : 'quopri_codec', - 'quoted_printable' : 'quopri_codec', - 'quotedprintable' : 'quopri_codec', - - # rot_13 codec - 'rot13' : 'rot_13', - - # shift_jis codec - 'csshiftjis' : 'shift_jis', - 'shiftjis' : 'shift_jis', - 'sjis' : 'shift_jis', - 's_jis' : 'shift_jis', - - # shift_jis_2004 codec - 'shiftjis2004' : 'shift_jis_2004', - 'sjis_2004' : 'shift_jis_2004', - 's_jis_2004' : 'shift_jis_2004', - - # shift_jisx0213 codec - 'shiftjisx0213' : 'shift_jisx0213', - 'sjisx0213' : 'shift_jisx0213', - 's_jisx0213' : 'shift_jisx0213', - - # tactis codec - 'tis260' : 'tactis', - - # tis_620 codec - 'tis620' : 'tis_620', - 'tis_620_0' : 'tis_620', - 'tis_620_2529_0' : 'tis_620', - 'tis_620_2529_1' : 'tis_620', - 'iso_ir_166' : 'tis_620', - - # utf_16 codec - 'u16' : 'utf_16', - 'utf16' : 'utf_16', - - # utf_16_be codec - 'unicodebigunmarked' : 'utf_16_be', - 'utf_16be' : 'utf_16_be', - - # utf_16_le codec - 'unicodelittleunmarked' : 'utf_16_le', - 'utf_16le' : 'utf_16_le', - - # utf_32 codec - 'u32' : 'utf_32', - 'utf32' : 'utf_32', - - # utf_32_be codec - 'utf_32be' : 'utf_32_be', - - # utf_32_le codec - 'utf_32le' : 'utf_32_le', - - # utf_7 codec - 'u7' : 'utf_7', - 'utf7' : 'utf_7', - 'unicode_1_1_utf_7' : 'utf_7', - - # utf_8 codec - 'u8' : 'utf_8', - 'utf' : 'utf_8', - 'utf8' : 'utf_8', - 'utf8_ucs2' : 'utf_8', - 'utf8_ucs4' : 'utf_8', - - # uu_codec codec - 'uu' : 'uu_codec', - - # zlib_codec codec - 'zip' : 'zlib_codec', - 'zlib' : 'zlib_codec', - -}
--- a/test/lib/python2.7/encodings/ascii.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -""" Python 'ascii' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.ascii_encode - decode = codecs.ascii_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.ascii_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.ascii_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -class StreamConverter(StreamWriter,StreamReader): - - encode = codecs.ascii_decode - decode = codecs.ascii_encode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='ascii', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - )
--- a/test/lib/python2.7/encodings/base64_codec.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -""" Python 'base64_codec' Codec - base64 content transfer encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Written by Marc-Andre Lemburg (mal@lemburg.com). - -""" -import codecs, base64 - -### Codec APIs - -def base64_encode(input,errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = base64.encodestring(input) - return (output, len(input)) - -def base64_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = base64.decodestring(input) - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input,errors='strict'): - return base64_encode(input,errors) - def decode(self, input,errors='strict'): - return base64_decode(input,errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - assert self.errors == 'strict' - return base64.encodestring(input) - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - assert self.errors == 'strict' - return base64.decodestring(input) - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='base64', - encode=base64_encode, - decode=base64_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - )
--- a/test/lib/python2.7/encodings/big5.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# big5.py: Python Unicode Codec for BIG5 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_tw, codecs -import _multibytecodec as mbc - -codec = _codecs_tw.getcodec('big5') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='big5', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/big5hkscs.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# big5hkscs.py: Python Unicode Codec for BIG5HKSCS -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_hk, codecs -import _multibytecodec as mbc - -codec = _codecs_hk.getcodec('big5hkscs') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='big5hkscs', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/bz2_codec.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -""" Python 'bz2_codec' Codec - bz2 compression encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Adapted by Raymond Hettinger from zlib_codec.py which was written - by Marc-Andre Lemburg (mal@lemburg.com). - -""" -import codecs -import bz2 # this codec needs the optional bz2 module ! - -### Codec APIs - -def bz2_encode(input,errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = bz2.compress(input) - return (output, len(input)) - -def bz2_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = bz2.decompress(input) - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input, errors='strict'): - return bz2_encode(input, errors) - def decode(self, input, errors='strict'): - return bz2_decode(input, errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - assert errors == 'strict' - self.errors = errors - self.compressobj = bz2.BZ2Compressor() - - def encode(self, input, final=False): - if final: - c = self.compressobj.compress(input) - return c + self.compressobj.flush() - else: - return self.compressobj.compress(input) - - def reset(self): - self.compressobj = bz2.BZ2Compressor() - -class IncrementalDecoder(codecs.IncrementalDecoder): - def __init__(self, errors='strict'): - assert errors == 'strict' - self.errors = errors - self.decompressobj = bz2.BZ2Decompressor() - - def decode(self, input, final=False): - try: - return self.decompressobj.decompress(input) - except EOFError: - return '' - - def reset(self): - self.decompressobj = bz2.BZ2Decompressor() - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name="bz2", - encode=bz2_encode, - decode=bz2_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - )
--- a/test/lib/python2.7/encodings/charmap.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,69 +0,0 @@ -""" Generic Python Character Mapping Codec. - - Use this codec directly rather than through the automatic - conversion mechanisms supplied by unicode() and .encode(). - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.charmap_encode - decode = codecs.charmap_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict', mapping=None): - codecs.IncrementalEncoder.__init__(self, errors) - self.mapping = mapping - - def encode(self, input, final=False): - return codecs.charmap_encode(input, self.errors, self.mapping)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def __init__(self, errors='strict', mapping=None): - codecs.IncrementalDecoder.__init__(self, errors) - self.mapping = mapping - - def decode(self, input, final=False): - return codecs.charmap_decode(input, self.errors, self.mapping)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - - def __init__(self,stream,errors='strict',mapping=None): - codecs.StreamWriter.__init__(self,stream,errors) - self.mapping = mapping - - def encode(self,input,errors='strict'): - return Codec.encode(input,errors,self.mapping) - -class StreamReader(Codec,codecs.StreamReader): - - def __init__(self,stream,errors='strict',mapping=None): - codecs.StreamReader.__init__(self,stream,errors) - self.mapping = mapping - - def decode(self,input,errors='strict'): - return Codec.decode(input,errors,self.mapping) - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='charmap', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - )
--- a/test/lib/python2.7/encodings/cp037.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp037 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp037', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xa2' # 0x4A -> CENT SIGN - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'|' # 0x4F -> VERTICAL LINE - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'!' # 0x5A -> EXCLAMATION MARK - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'\xac' # 0x5F -> NOT SIGN - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xAF -> REGISTERED SIGN - u'^' # 0xB0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xBA -> LEFT SQUARE BRACKET - u']' # 0xBB -> RIGHT SQUARE BRACKET - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1006.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1006 generated from 'MAPPINGS/VENDORS/MISC/CP1006.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1006', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u06f0' # 0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO - u'\u06f1' # 0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE - u'\u06f2' # 0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO - u'\u06f3' # 0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE - u'\u06f4' # 0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR - u'\u06f5' # 0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE - u'\u06f6' # 0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX - u'\u06f7' # 0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN - u'\u06f8' # 0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT - u'\u06f9' # 0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE - u'\u060c' # 0xAB -> ARABIC COMMA - u'\u061b' # 0xAC -> ARABIC SEMICOLON - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u061f' # 0xAE -> ARABIC QUESTION MARK - u'\ufe81' # 0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufe8d' # 0xB0 -> ARABIC LETTER ALEF ISOLATED FORM - u'\ufe8e' # 0xB1 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8e' # 0xB2 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8f' # 0xB3 -> ARABIC LETTER BEH ISOLATED FORM - u'\ufe91' # 0xB4 -> ARABIC LETTER BEH INITIAL FORM - u'\ufb56' # 0xB5 -> ARABIC LETTER PEH ISOLATED FORM - u'\ufb58' # 0xB6 -> ARABIC LETTER PEH INITIAL FORM - u'\ufe93' # 0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM - u'\ufe95' # 0xB8 -> ARABIC LETTER TEH ISOLATED FORM - u'\ufe97' # 0xB9 -> ARABIC LETTER TEH INITIAL FORM - u'\ufb66' # 0xBA -> ARABIC LETTER TTEH ISOLATED FORM - u'\ufb68' # 0xBB -> ARABIC LETTER TTEH INITIAL FORM - u'\ufe99' # 0xBC -> ARABIC LETTER THEH ISOLATED FORM - u'\ufe9b' # 0xBD -> ARABIC LETTER THEH INITIAL FORM - u'\ufe9d' # 0xBE -> ARABIC LETTER JEEM ISOLATED FORM - u'\ufe9f' # 0xBF -> ARABIC LETTER JEEM INITIAL FORM - u'\ufb7a' # 0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM - u'\ufb7c' # 0xC1 -> ARABIC LETTER TCHEH INITIAL FORM - u'\ufea1' # 0xC2 -> ARABIC LETTER HAH ISOLATED FORM - u'\ufea3' # 0xC3 -> ARABIC LETTER HAH INITIAL FORM - u'\ufea5' # 0xC4 -> ARABIC LETTER KHAH ISOLATED FORM - u'\ufea7' # 0xC5 -> ARABIC LETTER KHAH INITIAL FORM - u'\ufea9' # 0xC6 -> ARABIC LETTER DAL ISOLATED FORM - u'\ufb84' # 0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN - u'\ufeab' # 0xC8 -> ARABIC LETTER THAL ISOLATED FORM - u'\ufead' # 0xC9 -> ARABIC LETTER REH ISOLATED FORM - u'\ufb8c' # 0xCA -> ARABIC LETTER RREH ISOLATED FORM - u'\ufeaf' # 0xCB -> ARABIC LETTER ZAIN ISOLATED FORM - u'\ufb8a' # 0xCC -> ARABIC LETTER JEH ISOLATED FORM - u'\ufeb1' # 0xCD -> ARABIC LETTER SEEN ISOLATED FORM - u'\ufeb3' # 0xCE -> ARABIC LETTER SEEN INITIAL FORM - u'\ufeb5' # 0xCF -> ARABIC LETTER SHEEN ISOLATED FORM - u'\ufeb7' # 0xD0 -> ARABIC LETTER SHEEN INITIAL FORM - u'\ufeb9' # 0xD1 -> ARABIC LETTER SAD ISOLATED FORM - u'\ufebb' # 0xD2 -> ARABIC LETTER SAD INITIAL FORM - u'\ufebd' # 0xD3 -> ARABIC LETTER DAD ISOLATED FORM - u'\ufebf' # 0xD4 -> ARABIC LETTER DAD INITIAL FORM - u'\ufec1' # 0xD5 -> ARABIC LETTER TAH ISOLATED FORM - u'\ufec5' # 0xD6 -> ARABIC LETTER ZAH ISOLATED FORM - u'\ufec9' # 0xD7 -> ARABIC LETTER AIN ISOLATED FORM - u'\ufeca' # 0xD8 -> ARABIC LETTER AIN FINAL FORM - u'\ufecb' # 0xD9 -> ARABIC LETTER AIN INITIAL FORM - u'\ufecc' # 0xDA -> ARABIC LETTER AIN MEDIAL FORM - u'\ufecd' # 0xDB -> ARABIC LETTER GHAIN ISOLATED FORM - u'\ufece' # 0xDC -> ARABIC LETTER GHAIN FINAL FORM - u'\ufecf' # 0xDD -> ARABIC LETTER GHAIN INITIAL FORM - u'\ufed0' # 0xDE -> ARABIC LETTER GHAIN MEDIAL FORM - u'\ufed1' # 0xDF -> ARABIC LETTER FEH ISOLATED FORM - u'\ufed3' # 0xE0 -> ARABIC LETTER FEH INITIAL FORM - u'\ufed5' # 0xE1 -> ARABIC LETTER QAF ISOLATED FORM - u'\ufed7' # 0xE2 -> ARABIC LETTER QAF INITIAL FORM - u'\ufed9' # 0xE3 -> ARABIC LETTER KAF ISOLATED FORM - u'\ufedb' # 0xE4 -> ARABIC LETTER KAF INITIAL FORM - u'\ufb92' # 0xE5 -> ARABIC LETTER GAF ISOLATED FORM - u'\ufb94' # 0xE6 -> ARABIC LETTER GAF INITIAL FORM - u'\ufedd' # 0xE7 -> ARABIC LETTER LAM ISOLATED FORM - u'\ufedf' # 0xE8 -> ARABIC LETTER LAM INITIAL FORM - u'\ufee0' # 0xE9 -> ARABIC LETTER LAM MEDIAL FORM - u'\ufee1' # 0xEA -> ARABIC LETTER MEEM ISOLATED FORM - u'\ufee3' # 0xEB -> ARABIC LETTER MEEM INITIAL FORM - u'\ufb9e' # 0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM - u'\ufee5' # 0xED -> ARABIC LETTER NOON ISOLATED FORM - u'\ufee7' # 0xEE -> ARABIC LETTER NOON INITIAL FORM - u'\ufe85' # 0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - u'\ufeed' # 0xF0 -> ARABIC LETTER WAW ISOLATED FORM - u'\ufba6' # 0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM - u'\ufba8' # 0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM - u'\ufba9' # 0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM - u'\ufbaa' # 0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM - u'\ufe80' # 0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM - u'\ufe89' # 0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM - u'\ufe8a' # 0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM - u'\ufe8b' # 0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - u'\ufef1' # 0xF9 -> ARABIC LETTER YEH ISOLATED FORM - u'\ufef2' # 0xFA -> ARABIC LETTER YEH FINAL FORM - u'\ufef3' # 0xFB -> ARABIC LETTER YEH INITIAL FORM - u'\ufbb0' # 0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM - u'\ufbae' # 0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM - u'\ufe7c' # 0xFE -> ARABIC SHADDA ISOLATED FORM - u'\ufe7d' # 0xFF -> ARABIC SHADDA MEDIAL FORM -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1026.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1026 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1026', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'{' # 0x48 -> LEFT CURLY BRACKET - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xc7' # 0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'!' # 0x4F -> EXCLAMATION MARK - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u011e' # 0x5A -> LATIN CAPITAL LETTER G WITH BREVE - u'\u0130' # 0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'^' # 0x5F -> CIRCUMFLEX ACCENT - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'[' # 0x68 -> LEFT SQUARE BRACKET - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u015f' # 0x6A -> LATIN SMALL LETTER S WITH CEDILLA - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u0131' # 0x79 -> LATIN SMALL LETTER DOTLESS I - u':' # 0x7A -> COLON - u'\xd6' # 0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u015e' # 0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'\xdc' # 0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'}' # 0x8C -> RIGHT CURLY BRACKET - u'`' # 0x8D -> GRAVE ACCENT - u'\xa6' # 0x8E -> BROKEN BAR - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'\xf6' # 0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u']' # 0xAC -> RIGHT SQUARE BRACKET - u'$' # 0xAD -> DOLLAR SIGN - u'@' # 0xAE -> COMMERCIAL AT - u'\xae' # 0xAF -> REGISTERED SIGN - u'\xa2' # 0xB0 -> CENT SIGN - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'\xac' # 0xBA -> NOT SIGN - u'|' # 0xBB -> VERTICAL LINE - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'\xe7' # 0xC0 -> LATIN SMALL LETTER C WITH CEDILLA - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'~' # 0xCC -> TILDE - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'\u011f' # 0xD0 -> LATIN SMALL LETTER G WITH BREVE - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\\' # 0xDC -> REVERSE SOLIDUS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xfc' # 0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'#' # 0xEC -> NUMBER SIGN - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'"' # 0xFC -> QUOTATION MARK - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1140.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1140 generated from 'python-mappings/CP1140.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1140', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'\xa2' # 0x4A -> CENT SIGN - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'|' # 0x4F -> VERTICAL LINE - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'!' # 0x5A -> EXCLAMATION MARK - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'\xac' # 0x5F -> NOT SIGN - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\u20ac' # 0x9F -> EURO SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xAF -> REGISTERED SIGN - u'^' # 0xB0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xBA -> LEFT SQUARE BRACKET - u']' # 0xBB -> RIGHT SQUARE BRACKET - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1250.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1250 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1250', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\ufffe' # 0x83 -> UNDEFINED - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\ufffe' # 0x88 -> UNDEFINED - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u015a' # 0x8C -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u0164' # 0x8D -> LATIN CAPITAL LETTER T WITH CARON - u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON - u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u015b' # 0x9C -> LATIN SMALL LETTER S WITH ACUTE - u'\u0165' # 0x9D -> LATIN SMALL LETTER T WITH CARON - u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON - u'\u017a' # 0x9F -> LATIN SMALL LETTER Z WITH ACUTE - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u02c7' # 0xA1 -> CARON - u'\u02d8' # 0xA2 -> BREVE - u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u0104' # 0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u02db' # 0xB2 -> OGONEK - u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\u0105' # 0xB9 -> LATIN SMALL LETTER A WITH OGONEK - u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u013d' # 0xBC -> LATIN CAPITAL LETTER L WITH CARON - u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT - u'\u013e' # 0xBE -> LATIN SMALL LETTER L WITH CARON - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON - u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE - u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON - u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1251.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1251 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1251', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u0402' # 0x80 -> CYRILLIC CAPITAL LETTER DJE - u'\u0403' # 0x81 -> CYRILLIC CAPITAL LETTER GJE - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0453' # 0x83 -> CYRILLIC SMALL LETTER GJE - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u20ac' # 0x88 -> EURO SIGN - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0409' # 0x8A -> CYRILLIC CAPITAL LETTER LJE - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u040a' # 0x8C -> CYRILLIC CAPITAL LETTER NJE - u'\u040c' # 0x8D -> CYRILLIC CAPITAL LETTER KJE - u'\u040b' # 0x8E -> CYRILLIC CAPITAL LETTER TSHE - u'\u040f' # 0x8F -> CYRILLIC CAPITAL LETTER DZHE - u'\u0452' # 0x90 -> CYRILLIC SMALL LETTER DJE - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0459' # 0x9A -> CYRILLIC SMALL LETTER LJE - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u045a' # 0x9C -> CYRILLIC SMALL LETTER NJE - u'\u045c' # 0x9D -> CYRILLIC SMALL LETTER KJE - u'\u045b' # 0x9E -> CYRILLIC SMALL LETTER TSHE - u'\u045f' # 0x9F -> CYRILLIC SMALL LETTER DZHE - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u040e' # 0xA1 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0xA2 -> CYRILLIC SMALL LETTER SHORT U - u'\u0408' # 0xA3 -> CYRILLIC CAPITAL LETTER JE - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u0490' # 0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u0401' # 0xA8 -> CYRILLIC CAPITAL LETTER IO - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0404' # 0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u0407' # 0xAF -> CYRILLIC CAPITAL LETTER YI - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u0406' # 0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0456' # 0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0491' # 0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u0451' # 0xB8 -> CYRILLIC SMALL LETTER IO - u'\u2116' # 0xB9 -> NUMERO SIGN - u'\u0454' # 0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0458' # 0xBC -> CYRILLIC SMALL LETTER JE - u'\u0405' # 0xBD -> CYRILLIC CAPITAL LETTER DZE - u'\u0455' # 0xBE -> CYRILLIC SMALL LETTER DZE - u'\u0457' # 0xBF -> CYRILLIC SMALL LETTER YI - u'\u0410' # 0xC0 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xC1 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0xC2 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0xC3 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0xC4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xC5 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0xC6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0xC7 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0xC8 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xC9 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xCA -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xCB -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xCC -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xCD -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xCE -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xCF -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0xD0 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xD1 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xD2 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xD3 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0xD4 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0xD5 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0xD6 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0xD7 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0xD8 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0xD9 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0xDB -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0xDD -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0xDE -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0xDF -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0xFF -> CYRILLIC SMALL LETTER YA -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1252.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1252 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1252', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8D -> UNDEFINED - u'\u017d' # 0x8E -> LATIN CAPITAL LETTER Z WITH CARON - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9D -> UNDEFINED - u'\u017e' # 0x9E -> LATIN SMALL LETTER Z WITH CARON - u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1253.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1253 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1253', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\ufffe' # 0x88 -> UNDEFINED - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9C -> UNDEFINED - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0385' # 0xA1 -> GREEK DIALYTIKA TONOS - u'\u0386' # 0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\ufffe' # 0xAA -> UNDEFINED - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u2015' # 0xAF -> HORIZONTAL BAR - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u0384' # 0xB4 -> GREEK TONOS - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU - u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU - u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI - u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO - u'\ufffe' # 0xD2 -> UNDEFINED - u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU - u'\u03bd' # 0xED -> GREEK SMALL LETTER NU - u'\u03be' # 0xEE -> GREEK SMALL LETTER XI - u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI - u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO - u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI - u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA - u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\ufffe' # 0xFF -> UNDEFINED -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1254.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1254 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1254', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0160' # 0x8A -> LATIN CAPITAL LETTER S WITH CARON - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0161' # 0x9A -> LATIN SMALL LETTER S WITH CARON - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1255.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1255 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1255', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9C -> UNDEFINED - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\u20aa' # 0xA4 -> NEW SHEQEL SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xd7' # 0xAA -> MULTIPLICATION SIGN - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xf7' # 0xBA -> DIVISION SIGN - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\u05b0' # 0xC0 -> HEBREW POINT SHEVA - u'\u05b1' # 0xC1 -> HEBREW POINT HATAF SEGOL - u'\u05b2' # 0xC2 -> HEBREW POINT HATAF PATAH - u'\u05b3' # 0xC3 -> HEBREW POINT HATAF QAMATS - u'\u05b4' # 0xC4 -> HEBREW POINT HIRIQ - u'\u05b5' # 0xC5 -> HEBREW POINT TSERE - u'\u05b6' # 0xC6 -> HEBREW POINT SEGOL - u'\u05b7' # 0xC7 -> HEBREW POINT PATAH - u'\u05b8' # 0xC8 -> HEBREW POINT QAMATS - u'\u05b9' # 0xC9 -> HEBREW POINT HOLAM - u'\ufffe' # 0xCA -> UNDEFINED - u'\u05bb' # 0xCB -> HEBREW POINT QUBUTS - u'\u05bc' # 0xCC -> HEBREW POINT DAGESH OR MAPIQ - u'\u05bd' # 0xCD -> HEBREW POINT METEG - u'\u05be' # 0xCE -> HEBREW PUNCTUATION MAQAF - u'\u05bf' # 0xCF -> HEBREW POINT RAFE - u'\u05c0' # 0xD0 -> HEBREW PUNCTUATION PASEQ - u'\u05c1' # 0xD1 -> HEBREW POINT SHIN DOT - u'\u05c2' # 0xD2 -> HEBREW POINT SIN DOT - u'\u05c3' # 0xD3 -> HEBREW PUNCTUATION SOF PASUQ - u'\u05f0' # 0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV - u'\u05f1' # 0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD - u'\u05f2' # 0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD - u'\u05f3' # 0xD7 -> HEBREW PUNCTUATION GERESH - u'\u05f4' # 0xD8 -> HEBREW PUNCTUATION GERSHAYIM - u'\ufffe' # 0xD9 -> UNDEFINED - u'\ufffe' # 0xDA -> UNDEFINED - u'\ufffe' # 0xDB -> UNDEFINED - u'\ufffe' # 0xDC -> UNDEFINED - u'\ufffe' # 0xDD -> UNDEFINED - u'\ufffe' # 0xDE -> UNDEFINED - u'\ufffe' # 0xDF -> UNDEFINED - u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF - u'\u05d1' # 0xE1 -> HEBREW LETTER BET - u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL - u'\u05d3' # 0xE3 -> HEBREW LETTER DALET - u'\u05d4' # 0xE4 -> HEBREW LETTER HE - u'\u05d5' # 0xE5 -> HEBREW LETTER VAV - u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0xE7 -> HEBREW LETTER HET - u'\u05d8' # 0xE8 -> HEBREW LETTER TET - u'\u05d9' # 0xE9 -> HEBREW LETTER YOD - u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF - u'\u05db' # 0xEB -> HEBREW LETTER KAF - u'\u05dc' # 0xEC -> HEBREW LETTER LAMED - u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM - u'\u05de' # 0xEE -> HEBREW LETTER MEM - u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0xF0 -> HEBREW LETTER NUN - u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN - u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0xF4 -> HEBREW LETTER PE - u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI - u'\u05e7' # 0xF7 -> HEBREW LETTER QOF - u'\u05e8' # 0xF8 -> HEBREW LETTER RESH - u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN - u'\u05ea' # 0xFA -> HEBREW LETTER TAV - u'\ufffe' # 0xFB -> UNDEFINED - u'\ufffe' # 0xFC -> UNDEFINED - u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK - u'\ufffe' # 0xFF -> UNDEFINED -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1256.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1256 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1256', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\u067e' # 0x81 -> ARABIC LETTER PEH - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\u0679' # 0x8A -> ARABIC LETTER TTEH - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\u0686' # 0x8D -> ARABIC LETTER TCHEH - u'\u0698' # 0x8E -> ARABIC LETTER JEH - u'\u0688' # 0x8F -> ARABIC LETTER DDAL - u'\u06af' # 0x90 -> ARABIC LETTER GAF - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u06a9' # 0x98 -> ARABIC LETTER KEHEH - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\u0691' # 0x9A -> ARABIC LETTER RREH - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\u200c' # 0x9D -> ZERO WIDTH NON-JOINER - u'\u200d' # 0x9E -> ZERO WIDTH JOINER - u'\u06ba' # 0x9F -> ARABIC LETTER NOON GHUNNA - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u060c' # 0xA1 -> ARABIC COMMA - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u06be' # 0xAA -> ARABIC LETTER HEH DOACHASHMEE - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\u061b' # 0xBA -> ARABIC SEMICOLON - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\u061f' # 0xBF -> ARABIC QUESTION MARK - u'\u06c1' # 0xC0 -> ARABIC LETTER HEH GOAL - u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xC7 -> ARABIC LETTER ALEF - u'\u0628' # 0xC8 -> ARABIC LETTER BEH - u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xCA -> ARABIC LETTER TEH - u'\u062b' # 0xCB -> ARABIC LETTER THEH - u'\u062c' # 0xCC -> ARABIC LETTER JEEM - u'\u062d' # 0xCD -> ARABIC LETTER HAH - u'\u062e' # 0xCE -> ARABIC LETTER KHAH - u'\u062f' # 0xCF -> ARABIC LETTER DAL - u'\u0630' # 0xD0 -> ARABIC LETTER THAL - u'\u0631' # 0xD1 -> ARABIC LETTER REH - u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xD3 -> ARABIC LETTER SEEN - u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xD5 -> ARABIC LETTER SAD - u'\u0636' # 0xD6 -> ARABIC LETTER DAD - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0637' # 0xD8 -> ARABIC LETTER TAH - u'\u0638' # 0xD9 -> ARABIC LETTER ZAH - u'\u0639' # 0xDA -> ARABIC LETTER AIN - u'\u063a' # 0xDB -> ARABIC LETTER GHAIN - u'\u0640' # 0xDC -> ARABIC TATWEEL - u'\u0641' # 0xDD -> ARABIC LETTER FEH - u'\u0642' # 0xDE -> ARABIC LETTER QAF - u'\u0643' # 0xDF -> ARABIC LETTER KAF - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\u0644' # 0xE1 -> ARABIC LETTER LAM - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0645' # 0xE3 -> ARABIC LETTER MEEM - u'\u0646' # 0xE4 -> ARABIC LETTER NOON - u'\u0647' # 0xE5 -> ARABIC LETTER HEH - u'\u0648' # 0xE6 -> ARABIC LETTER WAW - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0649' # 0xEC -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xED -> ARABIC LETTER YEH - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u064b' # 0xF0 -> ARABIC FATHATAN - u'\u064c' # 0xF1 -> ARABIC DAMMATAN - u'\u064d' # 0xF2 -> ARABIC KASRATAN - u'\u064e' # 0xF3 -> ARABIC FATHA - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u064f' # 0xF5 -> ARABIC DAMMA - u'\u0650' # 0xF6 -> ARABIC KASRA - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0651' # 0xF8 -> ARABIC SHADDA - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\u0652' # 0xFA -> ARABIC SUKUN - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK - u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1257.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1257 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1257', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\ufffe' # 0x83 -> UNDEFINED - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\ufffe' # 0x88 -> UNDEFINED - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\xa8' # 0x8D -> DIAERESIS - u'\u02c7' # 0x8E -> CARON - u'\xb8' # 0x8F -> CEDILLA - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufffe' # 0x9C -> UNDEFINED - u'\xaf' # 0x9D -> MACRON - u'\u02db' # 0x9E -> OGONEK - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\ufffe' # 0xA1 -> UNDEFINED - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\ufffe' # 0xA5 -> UNDEFINED - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xe6' # 0xBF -> LATIN SMALL LETTER AE - u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON - u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON - u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE - u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA - u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON - u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK - u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE - u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE - u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp1258.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp1258 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp1258', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\u201a' # 0x82 -> SINGLE LOW-9 QUOTATION MARK - u'\u0192' # 0x83 -> LATIN SMALL LETTER F WITH HOOK - u'\u201e' # 0x84 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\u2020' # 0x86 -> DAGGER - u'\u2021' # 0x87 -> DOUBLE DAGGER - u'\u02c6' # 0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u2030' # 0x89 -> PER MILLE SIGN - u'\ufffe' # 0x8A -> UNDEFINED - u'\u2039' # 0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u0152' # 0x8C -> LATIN CAPITAL LIGATURE OE - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\u02dc' # 0x98 -> SMALL TILDE - u'\u2122' # 0x99 -> TRADE MARK SIGN - u'\ufffe' # 0x9A -> UNDEFINED - u'\u203a' # 0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0153' # 0x9C -> LATIN SMALL LIGATURE OE - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\u0178' # 0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0300' # 0xCC -> COMBINING GRAVE ACCENT - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u0309' # 0xD2 -> COMBINING HOOK ABOVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u01a0' # 0xD5 -> LATIN CAPITAL LETTER O WITH HORN - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u01af' # 0xDD -> LATIN CAPITAL LETTER U WITH HORN - u'\u0303' # 0xDE -> COMBINING TILDE - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0301' # 0xEC -> COMBINING ACUTE ACCENT - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\u0323' # 0xF2 -> COMBINING DOT BELOW - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u01a1' # 0xF5 -> LATIN SMALL LETTER O WITH HORN - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u01b0' # 0xFD -> LATIN SMALL LETTER U WITH HORN - u'\u20ab' # 0xFE -> DONG SIGN - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp424.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp424 generated from 'MAPPINGS/VENDORS/MISC/CP424.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp424', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> SELECT - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> REQUIRED NEW LINE - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> GRAPHIC ESCAPE - u'\x8d' # 0x09 -> SUPERSCRIPT - u'\x8e' # 0x0A -> REPEAT - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> RESTORE/ENABLE PRESENTATION - u'\x85' # 0x15 -> NEW LINE - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> PROGRAM OPERATOR COMMUNICATION - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> UNIT BACK SPACE - u'\x8f' # 0x1B -> CUSTOMER USE ONE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> DIGIT SELECT - u'\x81' # 0x21 -> START OF SIGNIFICANCE - u'\x82' # 0x22 -> FIELD SEPARATOR - u'\x83' # 0x23 -> WORD UNDERSCORE - u'\x84' # 0x24 -> BYPASS OR INHIBIT PRESENTATION - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> SET ATTRIBUTE - u'\x89' # 0x29 -> START FIELD EXTENDED - u'\x8a' # 0x2A -> SET MODE OR SWITCH - u'\x8b' # 0x2B -> CONTROL SEQUENCE PREFIX - u'\x8c' # 0x2C -> MODIFY FIELD ATTRIBUTE - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> <reserved> - u'\x91' # 0x31 -> <reserved> - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> INDEX RETURN - u'\x94' # 0x34 -> PRESENTATION POSITION - u'\x95' # 0x35 -> TRANSPARENT - u'\x96' # 0x36 -> NUMERIC BACKSPACE - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> SUBSCRIPT - u'\x99' # 0x39 -> INDENT TABULATION - u'\x9a' # 0x3A -> REVERSE FORM FEED - u'\x9b' # 0x3B -> CUSTOMER USE THREE - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> <reserved> - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\u05d0' # 0x41 -> HEBREW LETTER ALEF - u'\u05d1' # 0x42 -> HEBREW LETTER BET - u'\u05d2' # 0x43 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x44 -> HEBREW LETTER DALET - u'\u05d4' # 0x45 -> HEBREW LETTER HE - u'\u05d5' # 0x46 -> HEBREW LETTER VAV - u'\u05d6' # 0x47 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x48 -> HEBREW LETTER HET - u'\u05d8' # 0x49 -> HEBREW LETTER TET - u'\xa2' # 0x4A -> CENT SIGN - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'|' # 0x4F -> VERTICAL LINE - u'&' # 0x50 -> AMPERSAND - u'\u05d9' # 0x51 -> HEBREW LETTER YOD - u'\u05da' # 0x52 -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x53 -> HEBREW LETTER KAF - u'\u05dc' # 0x54 -> HEBREW LETTER LAMED - u'\u05dd' # 0x55 -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x56 -> HEBREW LETTER MEM - u'\u05df' # 0x57 -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x58 -> HEBREW LETTER NUN - u'\u05e1' # 0x59 -> HEBREW LETTER SAMEKH - u'!' # 0x5A -> EXCLAMATION MARK - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'\xac' # 0x5F -> NOT SIGN - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\u05e2' # 0x62 -> HEBREW LETTER AYIN - u'\u05e3' # 0x63 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x64 -> HEBREW LETTER PE - u'\u05e5' # 0x65 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x66 -> HEBREW LETTER TSADI - u'\u05e7' # 0x67 -> HEBREW LETTER QOF - u'\u05e8' # 0x68 -> HEBREW LETTER RESH - u'\u05e9' # 0x69 -> HEBREW LETTER SHIN - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\ufffe' # 0x70 -> UNDEFINED - u'\u05ea' # 0x71 -> HEBREW LETTER TAV - u'\ufffe' # 0x72 -> UNDEFINED - u'\ufffe' # 0x73 -> UNDEFINED - u'\xa0' # 0x74 -> NO-BREAK SPACE - u'\ufffe' # 0x75 -> UNDEFINED - u'\ufffe' # 0x76 -> UNDEFINED - u'\ufffe' # 0x77 -> UNDEFINED - u'\u2017' # 0x78 -> DOUBLE LOW LINE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\ufffe' # 0x80 -> UNDEFINED - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\ufffe' # 0x9A -> UNDEFINED - u'\ufffe' # 0x9B -> UNDEFINED - u'\ufffe' # 0x9C -> UNDEFINED - u'\xb8' # 0x9D -> CEDILLA - u'\ufffe' # 0x9E -> UNDEFINED - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\ufffe' # 0xAA -> UNDEFINED - u'\ufffe' # 0xAB -> UNDEFINED - u'\ufffe' # 0xAC -> UNDEFINED - u'\ufffe' # 0xAD -> UNDEFINED - u'\ufffe' # 0xAE -> UNDEFINED - u'\xae' # 0xAF -> REGISTERED SIGN - u'^' # 0xB0 -> CIRCUMFLEX ACCENT - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'[' # 0xBA -> LEFT SQUARE BRACKET - u']' # 0xBB -> RIGHT SQUARE BRACKET - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\ufffe' # 0xCB -> UNDEFINED - u'\ufffe' # 0xCC -> UNDEFINED - u'\ufffe' # 0xCD -> UNDEFINED - u'\ufffe' # 0xCE -> UNDEFINED - u'\ufffe' # 0xCF -> UNDEFINED - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\ufffe' # 0xDB -> UNDEFINED - u'\ufffe' # 0xDC -> UNDEFINED - u'\ufffe' # 0xDD -> UNDEFINED - u'\ufffe' # 0xDE -> UNDEFINED - u'\ufffe' # 0xDF -> UNDEFINED - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\ufffe' # 0xEB -> UNDEFINED - u'\ufffe' # 0xEC -> UNDEFINED - u'\ufffe' # 0xED -> UNDEFINED - u'\ufffe' # 0xEE -> UNDEFINED - u'\ufffe' # 0xEF -> UNDEFINED - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\ufffe' # 0xFB -> UNDEFINED - u'\ufffe' # 0xFC -> UNDEFINED - u'\ufffe' # 0xFD -> UNDEFINED - u'\ufffe' # 0xFE -> UNDEFINED - u'\x9f' # 0xFF -> EIGHT ONES -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp437.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec cp437 generated from 'VENDORS/MICSFT/PC/CP437.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp437', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xa5' # 0x009d -> YEN SIGN - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a5: 0x009d, # YEN SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp500.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp500 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp500', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\xa0' # 0x41 -> NO-BREAK SPACE - u'\xe2' # 0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x43 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x44 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0x45 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe3' # 0x46 -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x47 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x48 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xf1' # 0x49 -> LATIN SMALL LETTER N WITH TILDE - u'[' # 0x4A -> LEFT SQUARE BRACKET - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'!' # 0x4F -> EXCLAMATION MARK - u'&' # 0x50 -> AMPERSAND - u'\xe9' # 0x51 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x53 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x54 -> LATIN SMALL LETTER E WITH GRAVE - u'\xed' # 0x55 -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x57 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xec' # 0x58 -> LATIN SMALL LETTER I WITH GRAVE - u'\xdf' # 0x59 -> LATIN SMALL LETTER SHARP S (GERMAN) - u']' # 0x5A -> RIGHT SQUARE BRACKET - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'^' # 0x5F -> CIRCUMFLEX ACCENT - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\xc2' # 0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc4' # 0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc0' # 0x64 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0x65 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc3' # 0x66 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc5' # 0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xd1' # 0x69 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xa6' # 0x6A -> BROKEN BAR - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xf8' # 0x70 -> LATIN SMALL LETTER O WITH STROKE - u'\xc9' # 0x71 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x74 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0x75 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0x78 -> LATIN CAPITAL LETTER I WITH GRAVE - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\xd8' # 0x80 -> LATIN CAPITAL LETTER O WITH STROKE - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\xab' # 0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xf0' # 0x8C -> LATIN SMALL LETTER ETH (ICELANDIC) - u'\xfd' # 0x8D -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0x8E -> LATIN SMALL LETTER THORN (ICELANDIC) - u'\xb1' # 0x8F -> PLUS-MINUS SIGN - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\xaa' # 0x9A -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x9B -> MASCULINE ORDINAL INDICATOR - u'\xe6' # 0x9C -> LATIN SMALL LIGATURE AE - u'\xb8' # 0x9D -> CEDILLA - u'\xc6' # 0x9E -> LATIN CAPITAL LIGATURE AE - u'\xa4' # 0x9F -> CURRENCY SIGN - u'\xb5' # 0xA0 -> MICRO SIGN - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\xa1' # 0xAA -> INVERTED EXCLAMATION MARK - u'\xbf' # 0xAB -> INVERTED QUESTION MARK - u'\xd0' # 0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC) - u'\xdd' # 0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC) - u'\xae' # 0xAF -> REGISTERED SIGN - u'\xa2' # 0xB0 -> CENT SIGN - u'\xa3' # 0xB1 -> POUND SIGN - u'\xa5' # 0xB2 -> YEN SIGN - u'\xb7' # 0xB3 -> MIDDLE DOT - u'\xa9' # 0xB4 -> COPYRIGHT SIGN - u'\xa7' # 0xB5 -> SECTION SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xbc' # 0xB7 -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xB8 -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xB9 -> VULGAR FRACTION THREE QUARTERS - u'\xac' # 0xBA -> NOT SIGN - u'|' # 0xBB -> VERTICAL LINE - u'\xaf' # 0xBC -> MACRON - u'\xa8' # 0xBD -> DIAERESIS - u'\xb4' # 0xBE -> ACUTE ACCENT - u'\xd7' # 0xBF -> MULTIPLICATION SIGN - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\xf4' # 0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0xCC -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0xCD -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xCE -> LATIN SMALL LETTER O WITH ACUTE - u'\xf5' # 0xCF -> LATIN SMALL LETTER O WITH TILDE - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb9' # 0xDA -> SUPERSCRIPT ONE - u'\xfb' # 0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xDC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xf9' # 0xDD -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xDE -> LATIN SMALL LETTER U WITH ACUTE - u'\xff' # 0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\xf7' # 0xE1 -> DIVISION SIGN - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xd4' # 0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd6' # 0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd2' # 0xED -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd5' # 0xEF -> LATIN CAPITAL LETTER O WITH TILDE - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xdb' # 0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xd9' # 0xFD -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xFE -> LATIN CAPITAL LETTER U WITH ACUTE - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp720.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,309 +0,0 @@ -"""Python Character Mapping Codec cp720 generated on Windows: -Vista 6.0.6002 SP2 Multiprocessor Free with the command: - python Tools/unicode/genwincodec.py 720 -"""#" - - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp720', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\x80' - u'\x81' - u'\xe9' # 0x82 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x83 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\x84' - u'\xe0' # 0x85 -> LATIN SMALL LETTER A WITH GRAVE - u'\x86' - u'\xe7' # 0x87 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x88 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x89 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x8A -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x8B -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x8C -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\x8d' - u'\x8e' - u'\x8f' - u'\x90' - u'\u0651' # 0x91 -> ARABIC SHADDA - u'\u0652' # 0x92 -> ARABIC SUKUN - u'\xf4' # 0x93 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xa4' # 0x94 -> CURRENCY SIGN - u'\u0640' # 0x95 -> ARABIC TATWEEL - u'\xfb' # 0x96 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x97 -> LATIN SMALL LETTER U WITH GRAVE - u'\u0621' # 0x98 -> ARABIC LETTER HAMZA - u'\u0622' # 0x99 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0x9A -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0x9B -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\xa3' # 0x9C -> POUND SIGN - u'\u0625' # 0x9D -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0x9E -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0x9F -> ARABIC LETTER ALEF - u'\u0628' # 0xA0 -> ARABIC LETTER BEH - u'\u0629' # 0xA1 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xA2 -> ARABIC LETTER TEH - u'\u062b' # 0xA3 -> ARABIC LETTER THEH - u'\u062c' # 0xA4 -> ARABIC LETTER JEEM - u'\u062d' # 0xA5 -> ARABIC LETTER HAH - u'\u062e' # 0xA6 -> ARABIC LETTER KHAH - u'\u062f' # 0xA7 -> ARABIC LETTER DAL - u'\u0630' # 0xA8 -> ARABIC LETTER THAL - u'\u0631' # 0xA9 -> ARABIC LETTER REH - u'\u0632' # 0xAA -> ARABIC LETTER ZAIN - u'\u0633' # 0xAB -> ARABIC LETTER SEEN - u'\u0634' # 0xAC -> ARABIC LETTER SHEEN - u'\u0635' # 0xAD -> ARABIC LETTER SAD - u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0xB0 -> LIGHT SHADE - u'\u2592' # 0xB1 -> MEDIUM SHADE - u'\u2593' # 0xB2 -> DARK SHADE - u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0xB5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0xB6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0xB8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0xBD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0xBE -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0xC6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xC7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0xCF -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xD0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0xD1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0xD2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0xD3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0xD4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0xD5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0xD6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0xD7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0xD8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0xDB -> FULL BLOCK - u'\u2584' # 0xDC -> LOWER HALF BLOCK - u'\u258c' # 0xDD -> LEFT HALF BLOCK - u'\u2590' # 0xDE -> RIGHT HALF BLOCK - u'\u2580' # 0xDF -> UPPER HALF BLOCK - u'\u0636' # 0xE0 -> ARABIC LETTER DAD - u'\u0637' # 0xE1 -> ARABIC LETTER TAH - u'\u0638' # 0xE2 -> ARABIC LETTER ZAH - u'\u0639' # 0xE3 -> ARABIC LETTER AIN - u'\u063a' # 0xE4 -> ARABIC LETTER GHAIN - u'\u0641' # 0xE5 -> ARABIC LETTER FEH - u'\xb5' # 0xE6 -> MICRO SIGN - u'\u0642' # 0xE7 -> ARABIC LETTER QAF - u'\u0643' # 0xE8 -> ARABIC LETTER KAF - u'\u0644' # 0xE9 -> ARABIC LETTER LAM - u'\u0645' # 0xEA -> ARABIC LETTER MEEM - u'\u0646' # 0xEB -> ARABIC LETTER NOON - u'\u0647' # 0xEC -> ARABIC LETTER HEH - u'\u0648' # 0xED -> ARABIC LETTER WAW - u'\u0649' # 0xEE -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xEF -> ARABIC LETTER YEH - u'\u2261' # 0xF0 -> IDENTICAL TO - u'\u064b' # 0xF1 -> ARABIC FATHATAN - u'\u064c' # 0xF2 -> ARABIC DAMMATAN - u'\u064d' # 0xF3 -> ARABIC KASRATAN - u'\u064e' # 0xF4 -> ARABIC FATHA - u'\u064f' # 0xF5 -> ARABIC DAMMA - u'\u0650' # 0xF6 -> ARABIC KASRA - u'\u2248' # 0xF7 -> ALMOST EQUAL TO - u'\xb0' # 0xF8 -> DEGREE SIGN - u'\u2219' # 0xF9 -> BULLET OPERATOR - u'\xb7' # 0xFA -> MIDDLE DOT - u'\u221a' # 0xFB -> SQUARE ROOT - u'\u207f' # 0xFC -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0xFD -> SUPERSCRIPT TWO - u'\u25a0' # 0xFE -> BLACK SQUARE - u'\xa0' # 0xFF -> NO-BREAK SPACE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp737.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec cp737 generated from 'VENDORS/MICSFT/PC/CP737.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp737', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x0081: 0x0392, # GREEK CAPITAL LETTER BETA - 0x0082: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x0083: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x0084: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x0085: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x0086: 0x0397, # GREEK CAPITAL LETTER ETA - 0x0087: 0x0398, # GREEK CAPITAL LETTER THETA - 0x0088: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x0089: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x008a: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x008b: 0x039c, # GREEK CAPITAL LETTER MU - 0x008c: 0x039d, # GREEK CAPITAL LETTER NU - 0x008d: 0x039e, # GREEK CAPITAL LETTER XI - 0x008e: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x008f: 0x03a0, # GREEK CAPITAL LETTER PI - 0x0090: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x0091: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x0092: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x0093: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x0094: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x0095: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x0096: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x0097: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x0098: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x0099: 0x03b2, # GREEK SMALL LETTER BETA - 0x009a: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x009b: 0x03b4, # GREEK SMALL LETTER DELTA - 0x009c: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x009d: 0x03b6, # GREEK SMALL LETTER ZETA - 0x009e: 0x03b7, # GREEK SMALL LETTER ETA - 0x009f: 0x03b8, # GREEK SMALL LETTER THETA - 0x00a0: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00a1: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00a2: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00a3: 0x03bc, # GREEK SMALL LETTER MU - 0x00a4: 0x03bd, # GREEK SMALL LETTER NU - 0x00a5: 0x03be, # GREEK SMALL LETTER XI - 0x00a6: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00a7: 0x03c0, # GREEK SMALL LETTER PI - 0x00a8: 0x03c1, # GREEK SMALL LETTER RHO - 0x00a9: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00aa: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ab: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ac: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00ad: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ae: 0x03c7, # GREEK SMALL LETTER CHI - 0x00af: 0x03c8, # GREEK SMALL LETTER PSI - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00e1: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x00e2: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x00e3: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x00e4: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00e5: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00e6: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00e7: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00e8: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00e9: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00ea: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x00eb: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x00ec: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x00ed: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x00ee: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x00ef: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x00f0: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x00f5: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0391' # 0x0080 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0x0081 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0x0082 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0x0083 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0x0084 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0x0085 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0x0086 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0x0087 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0x0088 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0x0089 -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0x008a -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0x008b -> GREEK CAPITAL LETTER MU - u'\u039d' # 0x008c -> GREEK CAPITAL LETTER NU - u'\u039e' # 0x008d -> GREEK CAPITAL LETTER XI - u'\u039f' # 0x008e -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0x008f -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0x0090 -> GREEK CAPITAL LETTER RHO - u'\u03a3' # 0x0091 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0x0092 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0x0093 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0x0094 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0x0095 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0x0096 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0x0097 -> GREEK CAPITAL LETTER OMEGA - u'\u03b1' # 0x0098 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x0099 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x009a -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0x009b -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x009c -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0x009d -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0x009e -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x009f -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x00a0 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x00a1 -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x00a2 -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x00a3 -> GREEK SMALL LETTER MU - u'\u03bd' # 0x00a4 -> GREEK SMALL LETTER NU - u'\u03be' # 0x00a5 -> GREEK SMALL LETTER XI - u'\u03bf' # 0x00a6 -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0x00a7 -> GREEK SMALL LETTER PI - u'\u03c1' # 0x00a8 -> GREEK SMALL LETTER RHO - u'\u03c3' # 0x00a9 -> GREEK SMALL LETTER SIGMA - u'\u03c2' # 0x00aa -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0x00ab -> GREEK SMALL LETTER TAU - u'\u03c5' # 0x00ac -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0x00ad -> GREEK SMALL LETTER PHI - u'\u03c7' # 0x00ae -> GREEK SMALL LETTER CHI - u'\u03c8' # 0x00af -> GREEK SMALL LETTER PSI - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03c9' # 0x00e0 -> GREEK SMALL LETTER OMEGA - u'\u03ac' # 0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03ca' # 0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03af' # 0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03cb' # 0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03ce' # 0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u0386' # 0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u03aa' # 0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00f7: 0x00f6, # DIVISION SIGN - 0x0386: 0x00ea, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0x00eb, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0x00ec, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0x00ed, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0x00ee, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0x00ef, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0x00f0, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0391: 0x0080, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0x0081, # GREEK CAPITAL LETTER BETA - 0x0393: 0x0082, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0x0083, # GREEK CAPITAL LETTER DELTA - 0x0395: 0x0084, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0x0085, # GREEK CAPITAL LETTER ZETA - 0x0397: 0x0086, # GREEK CAPITAL LETTER ETA - 0x0398: 0x0087, # GREEK CAPITAL LETTER THETA - 0x0399: 0x0088, # GREEK CAPITAL LETTER IOTA - 0x039a: 0x0089, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0x008a, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0x008b, # GREEK CAPITAL LETTER MU - 0x039d: 0x008c, # GREEK CAPITAL LETTER NU - 0x039e: 0x008d, # GREEK CAPITAL LETTER XI - 0x039f: 0x008e, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0x008f, # GREEK CAPITAL LETTER PI - 0x03a1: 0x0090, # GREEK CAPITAL LETTER RHO - 0x03a3: 0x0091, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0x0092, # GREEK CAPITAL LETTER TAU - 0x03a5: 0x0093, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0x0094, # GREEK CAPITAL LETTER PHI - 0x03a7: 0x0095, # GREEK CAPITAL LETTER CHI - 0x03a8: 0x0096, # GREEK CAPITAL LETTER PSI - 0x03a9: 0x0097, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0x00f4, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0x00f5, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0x00e1, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0x00e2, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0x00e3, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0x00e5, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b1: 0x0098, # GREEK SMALL LETTER ALPHA - 0x03b2: 0x0099, # GREEK SMALL LETTER BETA - 0x03b3: 0x009a, # GREEK SMALL LETTER GAMMA - 0x03b4: 0x009b, # GREEK SMALL LETTER DELTA - 0x03b5: 0x009c, # GREEK SMALL LETTER EPSILON - 0x03b6: 0x009d, # GREEK SMALL LETTER ZETA - 0x03b7: 0x009e, # GREEK SMALL LETTER ETA - 0x03b8: 0x009f, # GREEK SMALL LETTER THETA - 0x03b9: 0x00a0, # GREEK SMALL LETTER IOTA - 0x03ba: 0x00a1, # GREEK SMALL LETTER KAPPA - 0x03bb: 0x00a2, # GREEK SMALL LETTER LAMDA - 0x03bc: 0x00a3, # GREEK SMALL LETTER MU - 0x03bd: 0x00a4, # GREEK SMALL LETTER NU - 0x03be: 0x00a5, # GREEK SMALL LETTER XI - 0x03bf: 0x00a6, # GREEK SMALL LETTER OMICRON - 0x03c0: 0x00a7, # GREEK SMALL LETTER PI - 0x03c1: 0x00a8, # GREEK SMALL LETTER RHO - 0x03c2: 0x00aa, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0x00a9, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00ab, # GREEK SMALL LETTER TAU - 0x03c5: 0x00ac, # GREEK SMALL LETTER UPSILON - 0x03c6: 0x00ad, # GREEK SMALL LETTER PHI - 0x03c7: 0x00ae, # GREEK SMALL LETTER CHI - 0x03c8: 0x00af, # GREEK SMALL LETTER PSI - 0x03c9: 0x00e0, # GREEK SMALL LETTER OMEGA - 0x03ca: 0x00e4, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0x00e8, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0x00e6, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0x00e7, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0x00e9, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp775.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,697 +0,0 @@ -""" Python Character Mapping Codec cp775 generated from 'VENDORS/MICSFT/PC/CP775.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp775', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x008a: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x008b: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x008c: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0096: 0x00a2, # CENT SIGN - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x00a4, # CURRENCY SIGN - 0x00a0: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x00a1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00a4: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00a5: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00a6: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00a7: 0x00a6, # BROKEN BAR - 0x00a8: 0x00a9, # COPYRIGHT SIGN - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00b6: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00b7: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00b8: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00be: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00c7: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00d0: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00d1: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00d2: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00d3: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x00d4: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00d5: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00d6: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00d7: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00d8: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e8: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00e9: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00ea: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00eb: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00ec: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00ed: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x00ee: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00ef: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0106' # 0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0101' # 0x0083 -> LATIN SMALL LETTER A WITH MACRON - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0123' # 0x0085 -> LATIN SMALL LETTER G WITH CEDILLA - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0107' # 0x0087 -> LATIN SMALL LETTER C WITH ACUTE - u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE - u'\u0113' # 0x0089 -> LATIN SMALL LETTER E WITH MACRON - u'\u0156' # 0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\u0157' # 0x008b -> LATIN SMALL LETTER R WITH CEDILLA - u'\u012b' # 0x008c -> LATIN SMALL LETTER I WITH MACRON - u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\u014d' # 0x0093 -> LATIN SMALL LETTER O WITH MACRON - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u0122' # 0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\xa2' # 0x0096 -> CENT SIGN - u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\xa4' # 0x009f -> CURRENCY SIGN - u'\u0100' # 0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u012a' # 0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\u017b' # 0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017c' # 0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017a' # 0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE - u'\u201d' # 0x00a6 -> RIGHT DOUBLE QUOTATION MARK - u'\xa6' # 0x00a7 -> BROKEN BAR - u'\xa9' # 0x00a8 -> COPYRIGHT SIGN - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\u0141' # 0x00ad -> LATIN CAPITAL LETTER L WITH STROKE - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u0104' # 0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u010c' # 0x00b6 -> LATIN CAPITAL LETTER C WITH CARON - u'\u0118' # 0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0116' # 0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u012e' # 0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0160' # 0x00be -> LATIN CAPITAL LETTER S WITH CARON - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u0172' # 0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u016a' # 0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u017d' # 0x00cf -> LATIN CAPITAL LETTER Z WITH CARON - u'\u0105' # 0x00d0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u010d' # 0x00d1 -> LATIN SMALL LETTER C WITH CARON - u'\u0119' # 0x00d2 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0117' # 0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u012f' # 0x00d4 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0161' # 0x00d5 -> LATIN SMALL LETTER S WITH CARON - u'\u0173' # 0x00d6 -> LATIN SMALL LETTER U WITH OGONEK - u'\u016b' # 0x00d7 -> LATIN SMALL LETTER U WITH MACRON - u'\u017e' # 0x00d8 -> LATIN SMALL LETTER Z WITH CARON - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u014c' # 0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u0144' # 0x00e7 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0136' # 0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u0137' # 0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA - u'\u013b' # 0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u013c' # 0x00eb -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0146' # 0x00ec -> LATIN SMALL LETTER N WITH CEDILLA - u'\u0112' # 0x00ed -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0145' # 0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u2019' # 0x00ef -> RIGHT SINGLE QUOTATION MARK - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u201c' # 0x00f2 -> LEFT DOUBLE QUOTATION MARK - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u201e' # 0x00f7 -> DOUBLE LOW-9 QUOTATION MARK - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a2: 0x0096, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x009f, # CURRENCY SIGN - 0x00a6: 0x00a7, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a9: 0x00a8, # COPYRIGHT SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0100: 0x00a0, # LATIN CAPITAL LETTER A WITH MACRON - 0x0101: 0x0083, # LATIN SMALL LETTER A WITH MACRON - 0x0104: 0x00b5, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0x00d0, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0x0080, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0x0087, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0x00b6, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0x00d1, # LATIN SMALL LETTER C WITH CARON - 0x0112: 0x00ed, # LATIN CAPITAL LETTER E WITH MACRON - 0x0113: 0x0089, # LATIN SMALL LETTER E WITH MACRON - 0x0116: 0x00b8, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0117: 0x00d3, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0118: 0x00b7, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0x00d2, # LATIN SMALL LETTER E WITH OGONEK - 0x0122: 0x0095, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x0123: 0x0085, # LATIN SMALL LETTER G WITH CEDILLA - 0x012a: 0x00a1, # LATIN CAPITAL LETTER I WITH MACRON - 0x012b: 0x008c, # LATIN SMALL LETTER I WITH MACRON - 0x012e: 0x00bd, # LATIN CAPITAL LETTER I WITH OGONEK - 0x012f: 0x00d4, # LATIN SMALL LETTER I WITH OGONEK - 0x0136: 0x00e8, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x0137: 0x00e9, # LATIN SMALL LETTER K WITH CEDILLA - 0x013b: 0x00ea, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x013c: 0x00eb, # LATIN SMALL LETTER L WITH CEDILLA - 0x0141: 0x00ad, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0x00e7, # LATIN SMALL LETTER N WITH ACUTE - 0x0145: 0x00ee, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x0146: 0x00ec, # LATIN SMALL LETTER N WITH CEDILLA - 0x014c: 0x00e2, # LATIN CAPITAL LETTER O WITH MACRON - 0x014d: 0x0093, # LATIN SMALL LETTER O WITH MACRON - 0x0156: 0x008a, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x0157: 0x008b, # LATIN SMALL LETTER R WITH CEDILLA - 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE - 0x0160: 0x00be, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x00d5, # LATIN SMALL LETTER S WITH CARON - 0x016a: 0x00c7, # LATIN CAPITAL LETTER U WITH MACRON - 0x016b: 0x00d7, # LATIN SMALL LETTER U WITH MACRON - 0x0172: 0x00c6, # LATIN CAPITAL LETTER U WITH OGONEK - 0x0173: 0x00d6, # LATIN SMALL LETTER U WITH OGONEK - 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0x00a5, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0x00a3, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0x00a4, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0x00cf, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0x00d8, # LATIN SMALL LETTER Z WITH CARON - 0x2019: 0x00ef, # RIGHT SINGLE QUOTATION MARK - 0x201c: 0x00f2, # LEFT DOUBLE QUOTATION MARK - 0x201d: 0x00a6, # RIGHT DOUBLE QUOTATION MARK - 0x201e: 0x00f7, # DOUBLE LOW-9 QUOTATION MARK - 0x2219: 0x00f9, # BULLET OPERATOR - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp850.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP850.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp850', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH - 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa9' # 0x00b8 -> COPYRIGHT SIGN - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0x00bd -> CENT SIGN - u'\xa5' # 0x00be -> YEN SIGN - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE - u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH - u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH - u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\u0131' # 0x00d5 -> LATIN SMALL LETTER DOTLESS I - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\xa6' # 0x00dd -> BROKEN BAR - u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN - u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE - u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xaf' # 0x00ee -> MACRON - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2017' # 0x00f2 -> DOUBLE LOW LINE - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x00bd, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a5: 0x00be, # YEN SIGN - 0x00a6: 0x00dd, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x00b8, # COPYRIGHT SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00af: 0x00ee, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00f7, # CEDILLA - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0131: 0x00d5, # LATIN SMALL LETTER DOTLESS I - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x2017: 0x00f2, # DOUBLE LOW LINE - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp852.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP852.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp852', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0086: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x008b: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x0092: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x0096: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x0097: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x0098: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x009c: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x009d: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x00a5: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x00a6: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00a7: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00a8: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00a9: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x00ac: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x00ad: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x00b8: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00be: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x0102, # LATIN CAPITAL LETTER A WITH BREVE - 0x00c7: 0x0103, # LATIN SMALL LETTER A WITH BREVE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x0111, # LATIN SMALL LETTER D WITH STROKE - 0x00d1: 0x0110, # LATIN CAPITAL LETTER D WITH STROKE - 0x00d2: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x00d5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x0162, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x00de: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00e4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00e5: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00e6: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00e7: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00e8: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00eb: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x0163, # LATIN SMALL LETTER T WITH CEDILLA - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x02dd, # DOUBLE ACUTE ACCENT - 0x00f2: 0x02db, # OGONEK - 0x00f3: 0x02c7, # CARON - 0x00f4: 0x02d8, # BREVE - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x02d9, # DOT ABOVE - 0x00fb: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00fc: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00fd: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u016f' # 0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\u0107' # 0x0086 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u0142' # 0x0088 -> LATIN SMALL LETTER L WITH STROKE - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0150' # 0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\u0151' # 0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u0179' # 0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0106' # 0x008f -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0139' # 0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u013a' # 0x0092 -> LATIN SMALL LETTER L WITH ACUTE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u013d' # 0x0095 -> LATIN CAPITAL LETTER L WITH CARON - u'\u013e' # 0x0096 -> LATIN SMALL LETTER L WITH CARON - u'\u015a' # 0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0x0098 -> LATIN SMALL LETTER S WITH ACUTE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0164' # 0x009b -> LATIN CAPITAL LETTER T WITH CARON - u'\u0165' # 0x009c -> LATIN SMALL LETTER T WITH CARON - u'\u0141' # 0x009d -> LATIN CAPITAL LETTER L WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\u010d' # 0x009f -> LATIN SMALL LETTER C WITH CARON - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\u0104' # 0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0105' # 0x00a5 -> LATIN SMALL LETTER A WITH OGONEK - u'\u017d' # 0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017e' # 0x00a7 -> LATIN SMALL LETTER Z WITH CARON - u'\u0118' # 0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0119' # 0x00a9 -> LATIN SMALL LETTER E WITH OGONEK - u'\xac' # 0x00aa -> NOT SIGN - u'\u017a' # 0x00ab -> LATIN SMALL LETTER Z WITH ACUTE - u'\u010c' # 0x00ac -> LATIN CAPITAL LETTER C WITH CARON - u'\u015f' # 0x00ad -> LATIN SMALL LETTER S WITH CEDILLA - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u011a' # 0x00b7 -> LATIN CAPITAL LETTER E WITH CARON - u'\u015e' # 0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u017b' # 0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017c' # 0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u0102' # 0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE - u'\u0103' # 0x00c7 -> LATIN SMALL LETTER A WITH BREVE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\u0111' # 0x00d0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0110' # 0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u010e' # 0x00d2 -> LATIN CAPITAL LETTER D WITH CARON - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u010f' # 0x00d4 -> LATIN SMALL LETTER D WITH CARON - u'\u0147' # 0x00d5 -> LATIN CAPITAL LETTER N WITH CARON - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u011b' # 0x00d8 -> LATIN SMALL LETTER E WITH CARON - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u0162' # 0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\u016e' # 0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0143' # 0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0144' # 0x00e4 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0x00e5 -> LATIN SMALL LETTER N WITH CARON - u'\u0160' # 0x00e6 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0161' # 0x00e7 -> LATIN SMALL LETTER S WITH CARON - u'\u0154' # 0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0155' # 0x00ea -> LATIN SMALL LETTER R WITH ACUTE - u'\u0170' # 0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE - u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0163' # 0x00ee -> LATIN SMALL LETTER T WITH CEDILLA - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\u02dd' # 0x00f1 -> DOUBLE ACUTE ACCENT - u'\u02db' # 0x00f2 -> OGONEK - u'\u02c7' # 0x00f3 -> CARON - u'\u02d8' # 0x00f4 -> BREVE - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\u02d9' # 0x00fa -> DOT ABOVE - u'\u0171' # 0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\u0158' # 0x00fc -> LATIN CAPITAL LETTER R WITH CARON - u'\u0159' # 0x00fd -> LATIN SMALL LETTER R WITH CARON - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b8: 0x00f7, # CEDILLA - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE - 0x0102: 0x00c6, # LATIN CAPITAL LETTER A WITH BREVE - 0x0103: 0x00c7, # LATIN SMALL LETTER A WITH BREVE - 0x0104: 0x00a4, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0105: 0x00a5, # LATIN SMALL LETTER A WITH OGONEK - 0x0106: 0x008f, # LATIN CAPITAL LETTER C WITH ACUTE - 0x0107: 0x0086, # LATIN SMALL LETTER C WITH ACUTE - 0x010c: 0x00ac, # LATIN CAPITAL LETTER C WITH CARON - 0x010d: 0x009f, # LATIN SMALL LETTER C WITH CARON - 0x010e: 0x00d2, # LATIN CAPITAL LETTER D WITH CARON - 0x010f: 0x00d4, # LATIN SMALL LETTER D WITH CARON - 0x0110: 0x00d1, # LATIN CAPITAL LETTER D WITH STROKE - 0x0111: 0x00d0, # LATIN SMALL LETTER D WITH STROKE - 0x0118: 0x00a8, # LATIN CAPITAL LETTER E WITH OGONEK - 0x0119: 0x00a9, # LATIN SMALL LETTER E WITH OGONEK - 0x011a: 0x00b7, # LATIN CAPITAL LETTER E WITH CARON - 0x011b: 0x00d8, # LATIN SMALL LETTER E WITH CARON - 0x0139: 0x0091, # LATIN CAPITAL LETTER L WITH ACUTE - 0x013a: 0x0092, # LATIN SMALL LETTER L WITH ACUTE - 0x013d: 0x0095, # LATIN CAPITAL LETTER L WITH CARON - 0x013e: 0x0096, # LATIN SMALL LETTER L WITH CARON - 0x0141: 0x009d, # LATIN CAPITAL LETTER L WITH STROKE - 0x0142: 0x0088, # LATIN SMALL LETTER L WITH STROKE - 0x0143: 0x00e3, # LATIN CAPITAL LETTER N WITH ACUTE - 0x0144: 0x00e4, # LATIN SMALL LETTER N WITH ACUTE - 0x0147: 0x00d5, # LATIN CAPITAL LETTER N WITH CARON - 0x0148: 0x00e5, # LATIN SMALL LETTER N WITH CARON - 0x0150: 0x008a, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x0151: 0x008b, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x0154: 0x00e8, # LATIN CAPITAL LETTER R WITH ACUTE - 0x0155: 0x00ea, # LATIN SMALL LETTER R WITH ACUTE - 0x0158: 0x00fc, # LATIN CAPITAL LETTER R WITH CARON - 0x0159: 0x00fd, # LATIN SMALL LETTER R WITH CARON - 0x015a: 0x0097, # LATIN CAPITAL LETTER S WITH ACUTE - 0x015b: 0x0098, # LATIN SMALL LETTER S WITH ACUTE - 0x015e: 0x00b8, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0x00ad, # LATIN SMALL LETTER S WITH CEDILLA - 0x0160: 0x00e6, # LATIN CAPITAL LETTER S WITH CARON - 0x0161: 0x00e7, # LATIN SMALL LETTER S WITH CARON - 0x0162: 0x00dd, # LATIN CAPITAL LETTER T WITH CEDILLA - 0x0163: 0x00ee, # LATIN SMALL LETTER T WITH CEDILLA - 0x0164: 0x009b, # LATIN CAPITAL LETTER T WITH CARON - 0x0165: 0x009c, # LATIN SMALL LETTER T WITH CARON - 0x016e: 0x00de, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x016f: 0x0085, # LATIN SMALL LETTER U WITH RING ABOVE - 0x0170: 0x00eb, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x0171: 0x00fb, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x0179: 0x008d, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x017a: 0x00ab, # LATIN SMALL LETTER Z WITH ACUTE - 0x017b: 0x00bd, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x017c: 0x00be, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x017d: 0x00a6, # LATIN CAPITAL LETTER Z WITH CARON - 0x017e: 0x00a7, # LATIN SMALL LETTER Z WITH CARON - 0x02c7: 0x00f3, # CARON - 0x02d8: 0x00f4, # BREVE - 0x02d9: 0x00fa, # DOT ABOVE - 0x02db: 0x00f2, # OGONEK - 0x02dd: 0x00f1, # DOUBLE ACUTE ACCENT - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp855.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP855.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp855', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0452, # CYRILLIC SMALL LETTER DJE - 0x0081: 0x0402, # CYRILLIC CAPITAL LETTER DJE - 0x0082: 0x0453, # CYRILLIC SMALL LETTER GJE - 0x0083: 0x0403, # CYRILLIC CAPITAL LETTER GJE - 0x0084: 0x0451, # CYRILLIC SMALL LETTER IO - 0x0085: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x0086: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0087: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0088: 0x0455, # CYRILLIC SMALL LETTER DZE - 0x0089: 0x0405, # CYRILLIC CAPITAL LETTER DZE - 0x008a: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008b: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x008c: 0x0457, # CYRILLIC SMALL LETTER YI - 0x008d: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x008e: 0x0458, # CYRILLIC SMALL LETTER JE - 0x008f: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x0090: 0x0459, # CYRILLIC SMALL LETTER LJE - 0x0091: 0x0409, # CYRILLIC CAPITAL LETTER LJE - 0x0092: 0x045a, # CYRILLIC SMALL LETTER NJE - 0x0093: 0x040a, # CYRILLIC CAPITAL LETTER NJE - 0x0094: 0x045b, # CYRILLIC SMALL LETTER TSHE - 0x0095: 0x040b, # CYRILLIC CAPITAL LETTER TSHE - 0x0096: 0x045c, # CYRILLIC SMALL LETTER KJE - 0x0097: 0x040c, # CYRILLIC CAPITAL LETTER KJE - 0x0098: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x0099: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x009a: 0x045f, # CYRILLIC SMALL LETTER DZHE - 0x009b: 0x040f, # CYRILLIC CAPITAL LETTER DZHE - 0x009c: 0x044e, # CYRILLIC SMALL LETTER YU - 0x009d: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009e: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x009f: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00a2: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a3: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00a4: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00a5: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00a6: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a7: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00a8: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a9: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00aa: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00ab: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00ac: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00ad: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00b6: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00b7: 0x0438, # CYRILLIC SMALL LETTER I - 0x00b8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00be: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00c7: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00d1: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00d2: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00d3: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00d4: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00d5: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00d6: 0x043e, # CYRILLIC SMALL LETTER O - 0x00d7: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00d8: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00de: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e1: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e2: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00e3: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e4: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00e5: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e6: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00e7: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e8: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00e9: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00ea: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00eb: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00ec: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00ed: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ee: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00ef: 0x2116, # NUMERO SIGN - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00f2: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00f3: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00f4: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00f5: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f6: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00f7: 0x044d, # CYRILLIC SMALL LETTER E - 0x00f8: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00fb: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00fc: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00fd: 0x00a7, # SECTION SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0452' # 0x0080 -> CYRILLIC SMALL LETTER DJE - u'\u0402' # 0x0081 -> CYRILLIC CAPITAL LETTER DJE - u'\u0453' # 0x0082 -> CYRILLIC SMALL LETTER GJE - u'\u0403' # 0x0083 -> CYRILLIC CAPITAL LETTER GJE - u'\u0451' # 0x0084 -> CYRILLIC SMALL LETTER IO - u'\u0401' # 0x0085 -> CYRILLIC CAPITAL LETTER IO - u'\u0454' # 0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0404' # 0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0455' # 0x0088 -> CYRILLIC SMALL LETTER DZE - u'\u0405' # 0x0089 -> CYRILLIC CAPITAL LETTER DZE - u'\u0456' # 0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0406' # 0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0x008c -> CYRILLIC SMALL LETTER YI - u'\u0407' # 0x008d -> CYRILLIC CAPITAL LETTER YI - u'\u0458' # 0x008e -> CYRILLIC SMALL LETTER JE - u'\u0408' # 0x008f -> CYRILLIC CAPITAL LETTER JE - u'\u0459' # 0x0090 -> CYRILLIC SMALL LETTER LJE - u'\u0409' # 0x0091 -> CYRILLIC CAPITAL LETTER LJE - u'\u045a' # 0x0092 -> CYRILLIC SMALL LETTER NJE - u'\u040a' # 0x0093 -> CYRILLIC CAPITAL LETTER NJE - u'\u045b' # 0x0094 -> CYRILLIC SMALL LETTER TSHE - u'\u040b' # 0x0095 -> CYRILLIC CAPITAL LETTER TSHE - u'\u045c' # 0x0096 -> CYRILLIC SMALL LETTER KJE - u'\u040c' # 0x0097 -> CYRILLIC CAPITAL LETTER KJE - u'\u045e' # 0x0098 -> CYRILLIC SMALL LETTER SHORT U - u'\u040e' # 0x0099 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045f' # 0x009a -> CYRILLIC SMALL LETTER DZHE - u'\u040f' # 0x009b -> CYRILLIC CAPITAL LETTER DZHE - u'\u044e' # 0x009c -> CYRILLIC SMALL LETTER YU - u'\u042e' # 0x009d -> CYRILLIC CAPITAL LETTER YU - u'\u044a' # 0x009e -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042a' # 0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A - u'\u0410' # 0x00a1 -> CYRILLIC CAPITAL LETTER A - u'\u0431' # 0x00a2 -> CYRILLIC SMALL LETTER BE - u'\u0411' # 0x00a3 -> CYRILLIC CAPITAL LETTER BE - u'\u0446' # 0x00a4 -> CYRILLIC SMALL LETTER TSE - u'\u0426' # 0x00a5 -> CYRILLIC CAPITAL LETTER TSE - u'\u0434' # 0x00a6 -> CYRILLIC SMALL LETTER DE - u'\u0414' # 0x00a7 -> CYRILLIC CAPITAL LETTER DE - u'\u0435' # 0x00a8 -> CYRILLIC SMALL LETTER IE - u'\u0415' # 0x00a9 -> CYRILLIC CAPITAL LETTER IE - u'\u0444' # 0x00aa -> CYRILLIC SMALL LETTER EF - u'\u0424' # 0x00ab -> CYRILLIC CAPITAL LETTER EF - u'\u0433' # 0x00ac -> CYRILLIC SMALL LETTER GHE - u'\u0413' # 0x00ad -> CYRILLIC CAPITAL LETTER GHE - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u0445' # 0x00b5 -> CYRILLIC SMALL LETTER HA - u'\u0425' # 0x00b6 -> CYRILLIC CAPITAL LETTER HA - u'\u0438' # 0x00b7 -> CYRILLIC SMALL LETTER I - u'\u0418' # 0x00b8 -> CYRILLIC CAPITAL LETTER I - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u0439' # 0x00bd -> CYRILLIC SMALL LETTER SHORT I - u'\u0419' # 0x00be -> CYRILLIC CAPITAL LETTER SHORT I - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u043a' # 0x00c6 -> CYRILLIC SMALL LETTER KA - u'\u041a' # 0x00c7 -> CYRILLIC CAPITAL LETTER KA - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\u043b' # 0x00d0 -> CYRILLIC SMALL LETTER EL - u'\u041b' # 0x00d1 -> CYRILLIC CAPITAL LETTER EL - u'\u043c' # 0x00d2 -> CYRILLIC SMALL LETTER EM - u'\u041c' # 0x00d3 -> CYRILLIC CAPITAL LETTER EM - u'\u043d' # 0x00d4 -> CYRILLIC SMALL LETTER EN - u'\u041d' # 0x00d5 -> CYRILLIC CAPITAL LETTER EN - u'\u043e' # 0x00d6 -> CYRILLIC SMALL LETTER O - u'\u041e' # 0x00d7 -> CYRILLIC CAPITAL LETTER O - u'\u043f' # 0x00d8 -> CYRILLIC SMALL LETTER PE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u041f' # 0x00dd -> CYRILLIC CAPITAL LETTER PE - u'\u044f' # 0x00de -> CYRILLIC SMALL LETTER YA - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u042f' # 0x00e0 -> CYRILLIC CAPITAL LETTER YA - u'\u0440' # 0x00e1 -> CYRILLIC SMALL LETTER ER - u'\u0420' # 0x00e2 -> CYRILLIC CAPITAL LETTER ER - u'\u0441' # 0x00e3 -> CYRILLIC SMALL LETTER ES - u'\u0421' # 0x00e4 -> CYRILLIC CAPITAL LETTER ES - u'\u0442' # 0x00e5 -> CYRILLIC SMALL LETTER TE - u'\u0422' # 0x00e6 -> CYRILLIC CAPITAL LETTER TE - u'\u0443' # 0x00e7 -> CYRILLIC SMALL LETTER U - u'\u0423' # 0x00e8 -> CYRILLIC CAPITAL LETTER U - u'\u0436' # 0x00e9 -> CYRILLIC SMALL LETTER ZHE - u'\u0416' # 0x00ea -> CYRILLIC CAPITAL LETTER ZHE - u'\u0432' # 0x00eb -> CYRILLIC SMALL LETTER VE - u'\u0412' # 0x00ec -> CYRILLIC CAPITAL LETTER VE - u'\u044c' # 0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u042c' # 0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u2116' # 0x00ef -> NUMERO SIGN - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\u044b' # 0x00f1 -> CYRILLIC SMALL LETTER YERU - u'\u042b' # 0x00f2 -> CYRILLIC CAPITAL LETTER YERU - u'\u0437' # 0x00f3 -> CYRILLIC SMALL LETTER ZE - u'\u0417' # 0x00f4 -> CYRILLIC CAPITAL LETTER ZE - u'\u0448' # 0x00f5 -> CYRILLIC SMALL LETTER SHA - u'\u0428' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHA - u'\u044d' # 0x00f7 -> CYRILLIC SMALL LETTER E - u'\u042d' # 0x00f8 -> CYRILLIC CAPITAL LETTER E - u'\u0449' # 0x00f9 -> CYRILLIC SMALL LETTER SHCHA - u'\u0429' # 0x00fa -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0447' # 0x00fb -> CYRILLIC SMALL LETTER CHE - u'\u0427' # 0x00fc -> CYRILLIC CAPITAL LETTER CHE - u'\xa7' # 0x00fd -> SECTION SIGN - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a7: 0x00fd, # SECTION SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x0401: 0x0085, # CYRILLIC CAPITAL LETTER IO - 0x0402: 0x0081, # CYRILLIC CAPITAL LETTER DJE - 0x0403: 0x0083, # CYRILLIC CAPITAL LETTER GJE - 0x0404: 0x0087, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0405: 0x0089, # CYRILLIC CAPITAL LETTER DZE - 0x0406: 0x008b, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0407: 0x008d, # CYRILLIC CAPITAL LETTER YI - 0x0408: 0x008f, # CYRILLIC CAPITAL LETTER JE - 0x0409: 0x0091, # CYRILLIC CAPITAL LETTER LJE - 0x040a: 0x0093, # CYRILLIC CAPITAL LETTER NJE - 0x040b: 0x0095, # CYRILLIC CAPITAL LETTER TSHE - 0x040c: 0x0097, # CYRILLIC CAPITAL LETTER KJE - 0x040e: 0x0099, # CYRILLIC CAPITAL LETTER SHORT U - 0x040f: 0x009b, # CYRILLIC CAPITAL LETTER DZHE - 0x0410: 0x00a1, # CYRILLIC CAPITAL LETTER A - 0x0411: 0x00a3, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0x00ec, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0x00ad, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0x00a7, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0x00a9, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0x00ea, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0x00f4, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0x00b8, # CYRILLIC CAPITAL LETTER I - 0x0419: 0x00be, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0x00c7, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0x00d1, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0x00d3, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0x00d5, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0x00d7, # CYRILLIC CAPITAL LETTER O - 0x041f: 0x00dd, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0x00e2, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0x00e4, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0x00e6, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0x00e8, # CYRILLIC CAPITAL LETTER U - 0x0424: 0x00ab, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0x00b6, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0x00a5, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0x00fc, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0x00f6, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0x00fa, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0x009f, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0x00f2, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0x00ee, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0x00f8, # CYRILLIC CAPITAL LETTER E - 0x042e: 0x009d, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0x00e0, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A - 0x0431: 0x00a2, # CYRILLIC SMALL LETTER BE - 0x0432: 0x00eb, # CYRILLIC SMALL LETTER VE - 0x0433: 0x00ac, # CYRILLIC SMALL LETTER GHE - 0x0434: 0x00a6, # CYRILLIC SMALL LETTER DE - 0x0435: 0x00a8, # CYRILLIC SMALL LETTER IE - 0x0436: 0x00e9, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0x00f3, # CYRILLIC SMALL LETTER ZE - 0x0438: 0x00b7, # CYRILLIC SMALL LETTER I - 0x0439: 0x00bd, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0x00c6, # CYRILLIC SMALL LETTER KA - 0x043b: 0x00d0, # CYRILLIC SMALL LETTER EL - 0x043c: 0x00d2, # CYRILLIC SMALL LETTER EM - 0x043d: 0x00d4, # CYRILLIC SMALL LETTER EN - 0x043e: 0x00d6, # CYRILLIC SMALL LETTER O - 0x043f: 0x00d8, # CYRILLIC SMALL LETTER PE - 0x0440: 0x00e1, # CYRILLIC SMALL LETTER ER - 0x0441: 0x00e3, # CYRILLIC SMALL LETTER ES - 0x0442: 0x00e5, # CYRILLIC SMALL LETTER TE - 0x0443: 0x00e7, # CYRILLIC SMALL LETTER U - 0x0444: 0x00aa, # CYRILLIC SMALL LETTER EF - 0x0445: 0x00b5, # CYRILLIC SMALL LETTER HA - 0x0446: 0x00a4, # CYRILLIC SMALL LETTER TSE - 0x0447: 0x00fb, # CYRILLIC SMALL LETTER CHE - 0x0448: 0x00f5, # CYRILLIC SMALL LETTER SHA - 0x0449: 0x00f9, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0x009e, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0x00f1, # CYRILLIC SMALL LETTER YERU - 0x044c: 0x00ed, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0x00f7, # CYRILLIC SMALL LETTER E - 0x044e: 0x009c, # CYRILLIC SMALL LETTER YU - 0x044f: 0x00de, # CYRILLIC SMALL LETTER YA - 0x0451: 0x0084, # CYRILLIC SMALL LETTER IO - 0x0452: 0x0080, # CYRILLIC SMALL LETTER DJE - 0x0453: 0x0082, # CYRILLIC SMALL LETTER GJE - 0x0454: 0x0086, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0455: 0x0088, # CYRILLIC SMALL LETTER DZE - 0x0456: 0x008a, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x0457: 0x008c, # CYRILLIC SMALL LETTER YI - 0x0458: 0x008e, # CYRILLIC SMALL LETTER JE - 0x0459: 0x0090, # CYRILLIC SMALL LETTER LJE - 0x045a: 0x0092, # CYRILLIC SMALL LETTER NJE - 0x045b: 0x0094, # CYRILLIC SMALL LETTER TSHE - 0x045c: 0x0096, # CYRILLIC SMALL LETTER KJE - 0x045e: 0x0098, # CYRILLIC SMALL LETTER SHORT U - 0x045f: 0x009a, # CYRILLIC SMALL LETTER DZHE - 0x2116: 0x00ef, # NUMERO SIGN - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp856.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp856 generated from 'MAPPINGS/VENDORS/MISC/CP856.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp856', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u05d0' # 0x80 -> HEBREW LETTER ALEF - u'\u05d1' # 0x81 -> HEBREW LETTER BET - u'\u05d2' # 0x82 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x83 -> HEBREW LETTER DALET - u'\u05d4' # 0x84 -> HEBREW LETTER HE - u'\u05d5' # 0x85 -> HEBREW LETTER VAV - u'\u05d6' # 0x86 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x87 -> HEBREW LETTER HET - u'\u05d8' # 0x88 -> HEBREW LETTER TET - u'\u05d9' # 0x89 -> HEBREW LETTER YOD - u'\u05da' # 0x8A -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x8B -> HEBREW LETTER KAF - u'\u05dc' # 0x8C -> HEBREW LETTER LAMED - u'\u05dd' # 0x8D -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x8E -> HEBREW LETTER MEM - u'\u05df' # 0x8F -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x90 -> HEBREW LETTER NUN - u'\u05e1' # 0x91 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0x92 -> HEBREW LETTER AYIN - u'\u05e3' # 0x93 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x94 -> HEBREW LETTER PE - u'\u05e5' # 0x95 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x96 -> HEBREW LETTER TSADI - u'\u05e7' # 0x97 -> HEBREW LETTER QOF - u'\u05e8' # 0x98 -> HEBREW LETTER RESH - u'\u05e9' # 0x99 -> HEBREW LETTER SHIN - u'\u05ea' # 0x9A -> HEBREW LETTER TAV - u'\ufffe' # 0x9B -> UNDEFINED - u'\xa3' # 0x9C -> POUND SIGN - u'\ufffe' # 0x9D -> UNDEFINED - u'\xd7' # 0x9E -> MULTIPLICATION SIGN - u'\ufffe' # 0x9F -> UNDEFINED - u'\ufffe' # 0xA0 -> UNDEFINED - u'\ufffe' # 0xA1 -> UNDEFINED - u'\ufffe' # 0xA2 -> UNDEFINED - u'\ufffe' # 0xA3 -> UNDEFINED - u'\ufffe' # 0xA4 -> UNDEFINED - u'\ufffe' # 0xA5 -> UNDEFINED - u'\ufffe' # 0xA6 -> UNDEFINED - u'\ufffe' # 0xA7 -> UNDEFINED - u'\ufffe' # 0xA8 -> UNDEFINED - u'\xae' # 0xA9 -> REGISTERED SIGN - u'\xac' # 0xAA -> NOT SIGN - u'\xbd' # 0xAB -> VULGAR FRACTION ONE HALF - u'\xbc' # 0xAC -> VULGAR FRACTION ONE QUARTER - u'\ufffe' # 0xAD -> UNDEFINED - u'\xab' # 0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0xB0 -> LIGHT SHADE - u'\u2592' # 0xB1 -> MEDIUM SHADE - u'\u2593' # 0xB2 -> DARK SHADE - u'\u2502' # 0xB3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\ufffe' # 0xB5 -> UNDEFINED - u'\ufffe' # 0xB6 -> UNDEFINED - u'\ufffe' # 0xB7 -> UNDEFINED - u'\xa9' # 0xB8 -> COPYRIGHT SIGN - u'\u2563' # 0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0xBA -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0xBD -> CENT SIGN - u'\xa5' # 0xBE -> YEN SIGN - u'\u2510' # 0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\ufffe' # 0xC6 -> UNDEFINED - u'\ufffe' # 0xC7 -> UNDEFINED - u'\u255a' # 0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0xCF -> CURRENCY SIGN - u'\ufffe' # 0xD0 -> UNDEFINED - u'\ufffe' # 0xD1 -> UNDEFINED - u'\ufffe' # 0xD2 -> UNDEFINED - u'\ufffe' # 0xD3 -> UNDEFINEDS - u'\ufffe' # 0xD4 -> UNDEFINED - u'\ufffe' # 0xD5 -> UNDEFINED - u'\ufffe' # 0xD6 -> UNDEFINEDE - u'\ufffe' # 0xD7 -> UNDEFINED - u'\ufffe' # 0xD8 -> UNDEFINED - u'\u2518' # 0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0xDB -> FULL BLOCK - u'\u2584' # 0xDC -> LOWER HALF BLOCK - u'\xa6' # 0xDD -> BROKEN BAR - u'\ufffe' # 0xDE -> UNDEFINED - u'\u2580' # 0xDF -> UPPER HALF BLOCK - u'\ufffe' # 0xE0 -> UNDEFINED - u'\ufffe' # 0xE1 -> UNDEFINED - u'\ufffe' # 0xE2 -> UNDEFINED - u'\ufffe' # 0xE3 -> UNDEFINED - u'\ufffe' # 0xE4 -> UNDEFINED - u'\ufffe' # 0xE5 -> UNDEFINED - u'\xb5' # 0xE6 -> MICRO SIGN - u'\ufffe' # 0xE7 -> UNDEFINED - u'\ufffe' # 0xE8 -> UNDEFINED - u'\ufffe' # 0xE9 -> UNDEFINED - u'\ufffe' # 0xEA -> UNDEFINED - u'\ufffe' # 0xEB -> UNDEFINED - u'\ufffe' # 0xEC -> UNDEFINED - u'\ufffe' # 0xED -> UNDEFINED - u'\xaf' # 0xEE -> MACRON - u'\xb4' # 0xEF -> ACUTE ACCENT - u'\xad' # 0xF0 -> SOFT HYPHEN - u'\xb1' # 0xF1 -> PLUS-MINUS SIGN - u'\u2017' # 0xF2 -> DOUBLE LOW LINE - u'\xbe' # 0xF3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0xF4 -> PILCROW SIGN - u'\xa7' # 0xF5 -> SECTION SIGN - u'\xf7' # 0xF6 -> DIVISION SIGN - u'\xb8' # 0xF7 -> CEDILLA - u'\xb0' # 0xF8 -> DEGREE SIGN - u'\xa8' # 0xF9 -> DIAERESIS - u'\xb7' # 0xFA -> MIDDLE DOT - u'\xb9' # 0xFB -> SUPERSCRIPT ONE - u'\xb3' # 0xFC -> SUPERSCRIPT THREE - u'\xb2' # 0xFD -> SUPERSCRIPT TWO - u'\u25a0' # 0xFE -> BLACK SQUARE - u'\xa0' # 0xFF -> NO-BREAK SPACE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp857.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,694 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP857.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp857', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x0131, # LATIN SMALL LETTER DOTLESS I - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x0130, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x015e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x009f: 0x015f, # LATIN SMALL LETTER S WITH CEDILLA - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x011e, # LATIN CAPITAL LETTER G WITH BREVE - 0x00a7: 0x011f, # LATIN SMALL LETTER G WITH BREVE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00d1: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: None, # UNDEFINED - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: None, # UNDEFINED - 0x00e8: 0x00d7, # MULTIPLICATION SIGN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ed: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: None, # UNDEFINED - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u0131' # 0x008d -> LATIN SMALL LETTER DOTLESS I - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\u0130' # 0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\u015e' # 0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u015f' # 0x009f -> LATIN SMALL LETTER S WITH CEDILLA - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\u011e' # 0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE - u'\u011f' # 0x00a7 -> LATIN SMALL LETTER G WITH BREVE - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa9' # 0x00b8 -> COPYRIGHT SIGN - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0x00bd -> CENT SIGN - u'\xa5' # 0x00be -> YEN SIGN - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE - u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\xba' # 0x00d0 -> MASCULINE ORDINAL INDICATOR - u'\xaa' # 0x00d1 -> FEMININE ORDINAL INDICATOR - u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\ufffe' # 0x00d5 -> UNDEFINED - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\xa6' # 0x00dd -> BROKEN BAR - u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\ufffe' # 0x00e7 -> UNDEFINED - u'\xd7' # 0x00e8 -> MULTIPLICATION SIGN - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xec' # 0x00ec -> LATIN SMALL LETTER I WITH GRAVE - u'\xff' # 0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xaf' # 0x00ee -> MACRON - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\ufffe' # 0x00f2 -> UNDEFINED - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x00bd, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a5: 0x00be, # YEN SIGN - 0x00a6: 0x00dd, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x00b8, # COPYRIGHT SIGN - 0x00aa: 0x00d1, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00af: 0x00ee, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00f7, # CEDILLA - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00ba: 0x00d0, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x00e8, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0x00ed, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x011e: 0x00a6, # LATIN CAPITAL LETTER G WITH BREVE - 0x011f: 0x00a7, # LATIN SMALL LETTER G WITH BREVE - 0x0130: 0x0098, # LATIN CAPITAL LETTER I WITH DOT ABOVE - 0x0131: 0x008d, # LATIN SMALL LETTER DOTLESS I - 0x015e: 0x009e, # LATIN CAPITAL LETTER S WITH CEDILLA - 0x015f: 0x009f, # LATIN SMALL LETTER S WITH CEDILLA - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp858.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec for CP858, modified from cp850. - -""" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp858', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x00d7, # MULTIPLICATION SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00b6: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00b7: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00b8: 0x00a9, # COPYRIGHT SIGN - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x00a2, # CENT SIGN - 0x00be: 0x00a5, # YEN SIGN - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00c7: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x00a4, # CURRENCY SIGN - 0x00d0: 0x00f0, # LATIN SMALL LETTER ETH - 0x00d1: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x00d2: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00d3: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00d4: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00d5: 0x20ac, # EURO SIGN - 0x00d6: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d7: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00d8: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x00a6, # BROKEN BAR - 0x00de: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e3: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e4: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00e5: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x00fe, # LATIN SMALL LETTER THORN - 0x00e8: 0x00de, # LATIN CAPITAL LETTER THORN - 0x00e9: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ea: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00eb: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ec: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00ed: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00ee: 0x00af, # MACRON - 0x00ef: 0x00b4, # ACUTE ACCENT - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2017, # DOUBLE LOW LINE - 0x00f3: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x00b8, # CEDILLA - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x00b9, # SUPERSCRIPT ONE - 0x00fc: 0x00b3, # SUPERSCRIPT THREE - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd7' # 0x009e -> MULTIPLICATION SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xae' # 0x00a9 -> REGISTERED SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\xc1' # 0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc0' # 0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa9' # 0x00b8 -> COPYRIGHT SIGN - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\xa2' # 0x00bd -> CENT SIGN - u'\xa5' # 0x00be -> YEN SIGN - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\xe3' # 0x00c6 -> LATIN SMALL LETTER A WITH TILDE - u'\xc3' # 0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa4' # 0x00cf -> CURRENCY SIGN - u'\xf0' # 0x00d0 -> LATIN SMALL LETTER ETH - u'\xd0' # 0x00d1 -> LATIN CAPITAL LETTER ETH - u'\xca' # 0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\u20ac' # 0x00d5 -> EURO SIGN - u'\xcd' # 0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\xa6' # 0x00dd -> BROKEN BAR - u'\xcc' # 0x00de -> LATIN CAPITAL LETTER I WITH GRAVE - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\xd3' # 0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\xd4' # 0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd2' # 0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xf5' # 0x00e4 -> LATIN SMALL LETTER O WITH TILDE - u'\xd5' # 0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\xfe' # 0x00e7 -> LATIN SMALL LETTER THORN - u'\xde' # 0x00e8 -> LATIN CAPITAL LETTER THORN - u'\xda' # 0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xfd' # 0x00ec -> LATIN SMALL LETTER Y WITH ACUTE - u'\xdd' # 0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xaf' # 0x00ee -> MACRON - u'\xb4' # 0x00ef -> ACUTE ACCENT - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2017' # 0x00f2 -> DOUBLE LOW LINE - u'\xbe' # 0x00f3 -> VULGAR FRACTION THREE QUARTERS - u'\xb6' # 0x00f4 -> PILCROW SIGN - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\xb8' # 0x00f7 -> CEDILLA - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\xb9' # 0x00fb -> SUPERSCRIPT ONE - u'\xb3' # 0x00fc -> SUPERSCRIPT THREE - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x00bd, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00cf, # CURRENCY SIGN - 0x00a5: 0x00be, # YEN SIGN - 0x00a6: 0x00dd, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x00b8, # COPYRIGHT SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00ae: 0x00a9, # REGISTERED SIGN - 0x00af: 0x00ee, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00fc, # SUPERSCRIPT THREE - 0x00b4: 0x00ef, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x00f4, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00f7, # CEDILLA - 0x00b9: 0x00fb, # SUPERSCRIPT ONE - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00f3, # VULGAR FRACTION THREE QUARTERS - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x00b7, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x00b5, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x00b6, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x00c7, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x00d4, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x00d2, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x00d3, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00cc: 0x00de, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x00d6, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00ce: 0x00d7, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x00d8, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d0: 0x00d1, # LATIN CAPITAL LETTER ETH - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00e3, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x00e0, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x00e2, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x00e5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d7: 0x009e, # MULTIPLICATION SIGN - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d9: 0x00eb, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x00e9, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00db: 0x00ea, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x00ed, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0x00e8, # LATIN CAPITAL LETTER THORN - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x00c6, # LATIN SMALL LETTER A WITH TILDE - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f0: 0x00d0, # LATIN SMALL LETTER ETH - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x00e4, # LATIN SMALL LETTER O WITH TILDE - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x00ec, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x00e7, # LATIN SMALL LETTER THORN - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x20ac: 0x00d5, # EURO SIGN - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x2017: 0x00f2, # DOUBLE LOW LINE - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp860.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP860.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp860', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x008c: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x008f: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x0092: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x0099: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0x0084 -> LATIN SMALL LETTER A WITH TILDE - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xc1' # 0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xca' # 0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xcd' # 0x008b -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xd4' # 0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc3' # 0x008e -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc2' # 0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xc0' # 0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc8' # 0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0x0094 -> LATIN SMALL LETTER O WITH TILDE - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xda' # 0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xcc' # 0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd5' # 0x0099 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u20a7' # 0x009e -> PESETA SIGN - u'\xd3' # 0x009f -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\xd2' # 0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c0: 0x0091, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c1: 0x0086, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c2: 0x008f, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c3: 0x008e, # LATIN CAPITAL LETTER A WITH TILDE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x0092, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x0089, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cc: 0x0098, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00cd: 0x008b, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d2: 0x00a9, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00d3: 0x009f, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d4: 0x008c, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d5: 0x0099, # LATIN CAPITAL LETTER O WITH TILDE - 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00da: 0x0096, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e3: 0x0084, # LATIN SMALL LETTER A WITH TILDE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f5: 0x0094, # LATIN SMALL LETTER O WITH TILDE - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp861.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP861.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp861', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00d0, # LATIN CAPITAL LETTER ETH - 0x008c: 0x00f0, # LATIN SMALL LETTER ETH - 0x008d: 0x00de, # LATIN CAPITAL LETTER THORN - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00fe, # LATIN SMALL LETTER THORN - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x0098: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00a5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00a6: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00a7: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xd0' # 0x008b -> LATIN CAPITAL LETTER ETH - u'\xf0' # 0x008c -> LATIN SMALL LETTER ETH - u'\xde' # 0x008d -> LATIN CAPITAL LETTER THORN - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xfe' # 0x0095 -> LATIN SMALL LETTER THORN - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xdd' # 0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xfd' # 0x0098 -> LATIN SMALL LETTER Y WITH ACUTE - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xc1' # 0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcd' # 0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xd3' # 0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xda' # 0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a3: 0x009c, # POUND SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c1: 0x00a4, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00cd: 0x00a5, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00d0: 0x008b, # LATIN CAPITAL LETTER ETH - 0x00d3: 0x00a6, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00da: 0x00a7, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dd: 0x0097, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00de: 0x008d, # LATIN CAPITAL LETTER THORN - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00f0: 0x008c, # LATIN SMALL LETTER ETH - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00fd: 0x0098, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fe: 0x0095, # LATIN SMALL LETTER THORN - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp862.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP862.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp862', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x05d0, # HEBREW LETTER ALEF - 0x0081: 0x05d1, # HEBREW LETTER BET - 0x0082: 0x05d2, # HEBREW LETTER GIMEL - 0x0083: 0x05d3, # HEBREW LETTER DALET - 0x0084: 0x05d4, # HEBREW LETTER HE - 0x0085: 0x05d5, # HEBREW LETTER VAV - 0x0086: 0x05d6, # HEBREW LETTER ZAYIN - 0x0087: 0x05d7, # HEBREW LETTER HET - 0x0088: 0x05d8, # HEBREW LETTER TET - 0x0089: 0x05d9, # HEBREW LETTER YOD - 0x008a: 0x05da, # HEBREW LETTER FINAL KAF - 0x008b: 0x05db, # HEBREW LETTER KAF - 0x008c: 0x05dc, # HEBREW LETTER LAMED - 0x008d: 0x05dd, # HEBREW LETTER FINAL MEM - 0x008e: 0x05de, # HEBREW LETTER MEM - 0x008f: 0x05df, # HEBREW LETTER FINAL NUN - 0x0090: 0x05e0, # HEBREW LETTER NUN - 0x0091: 0x05e1, # HEBREW LETTER SAMEKH - 0x0092: 0x05e2, # HEBREW LETTER AYIN - 0x0093: 0x05e3, # HEBREW LETTER FINAL PE - 0x0094: 0x05e4, # HEBREW LETTER PE - 0x0095: 0x05e5, # HEBREW LETTER FINAL TSADI - 0x0096: 0x05e6, # HEBREW LETTER TSADI - 0x0097: 0x05e7, # HEBREW LETTER QOF - 0x0098: 0x05e8, # HEBREW LETTER RESH - 0x0099: 0x05e9, # HEBREW LETTER SHIN - 0x009a: 0x05ea, # HEBREW LETTER TAV - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00a5, # YEN SIGN - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u05d0' # 0x0080 -> HEBREW LETTER ALEF - u'\u05d1' # 0x0081 -> HEBREW LETTER BET - u'\u05d2' # 0x0082 -> HEBREW LETTER GIMEL - u'\u05d3' # 0x0083 -> HEBREW LETTER DALET - u'\u05d4' # 0x0084 -> HEBREW LETTER HE - u'\u05d5' # 0x0085 -> HEBREW LETTER VAV - u'\u05d6' # 0x0086 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0x0087 -> HEBREW LETTER HET - u'\u05d8' # 0x0088 -> HEBREW LETTER TET - u'\u05d9' # 0x0089 -> HEBREW LETTER YOD - u'\u05da' # 0x008a -> HEBREW LETTER FINAL KAF - u'\u05db' # 0x008b -> HEBREW LETTER KAF - u'\u05dc' # 0x008c -> HEBREW LETTER LAMED - u'\u05dd' # 0x008d -> HEBREW LETTER FINAL MEM - u'\u05de' # 0x008e -> HEBREW LETTER MEM - u'\u05df' # 0x008f -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0x0090 -> HEBREW LETTER NUN - u'\u05e1' # 0x0091 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0x0092 -> HEBREW LETTER AYIN - u'\u05e3' # 0x0093 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0x0094 -> HEBREW LETTER PE - u'\u05e5' # 0x0095 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0x0096 -> HEBREW LETTER TSADI - u'\u05e7' # 0x0097 -> HEBREW LETTER QOF - u'\u05e8' # 0x0098 -> HEBREW LETTER RESH - u'\u05e9' # 0x0099 -> HEBREW LETTER SHIN - u'\u05ea' # 0x009a -> HEBREW LETTER TAV - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xa5' # 0x009d -> YEN SIGN - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN) - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a5: 0x009d, # YEN SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S (GERMAN) - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x05d0: 0x0080, # HEBREW LETTER ALEF - 0x05d1: 0x0081, # HEBREW LETTER BET - 0x05d2: 0x0082, # HEBREW LETTER GIMEL - 0x05d3: 0x0083, # HEBREW LETTER DALET - 0x05d4: 0x0084, # HEBREW LETTER HE - 0x05d5: 0x0085, # HEBREW LETTER VAV - 0x05d6: 0x0086, # HEBREW LETTER ZAYIN - 0x05d7: 0x0087, # HEBREW LETTER HET - 0x05d8: 0x0088, # HEBREW LETTER TET - 0x05d9: 0x0089, # HEBREW LETTER YOD - 0x05da: 0x008a, # HEBREW LETTER FINAL KAF - 0x05db: 0x008b, # HEBREW LETTER KAF - 0x05dc: 0x008c, # HEBREW LETTER LAMED - 0x05dd: 0x008d, # HEBREW LETTER FINAL MEM - 0x05de: 0x008e, # HEBREW LETTER MEM - 0x05df: 0x008f, # HEBREW LETTER FINAL NUN - 0x05e0: 0x0090, # HEBREW LETTER NUN - 0x05e1: 0x0091, # HEBREW LETTER SAMEKH - 0x05e2: 0x0092, # HEBREW LETTER AYIN - 0x05e3: 0x0093, # HEBREW LETTER FINAL PE - 0x05e4: 0x0094, # HEBREW LETTER PE - 0x05e5: 0x0095, # HEBREW LETTER FINAL TSADI - 0x05e6: 0x0096, # HEBREW LETTER TSADI - 0x05e7: 0x0097, # HEBREW LETTER QOF - 0x05e8: 0x0098, # HEBREW LETTER RESH - 0x05e9: 0x0099, # HEBREW LETTER SHIN - 0x05ea: 0x009a, # HEBREW LETTER TAV - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp863.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP863.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp863', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00b6, # PILCROW SIGN - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x2017, # DOUBLE LOW LINE - 0x008e: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x008f: 0x00a7, # SECTION SIGN - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x0092: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x0095: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00a4, # CURRENCY SIGN - 0x0099: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00a2, # CENT SIGN - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x009e: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00a6, # BROKEN BAR - 0x00a1: 0x00b4, # ACUTE ACCENT - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00a8, # DIAERESIS - 0x00a5: 0x00b8, # CEDILLA - 0x00a6: 0x00b3, # SUPERSCRIPT THREE - 0x00a7: 0x00af, # MACRON - 0x00a8: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xc2' # 0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xb6' # 0x0086 -> PILCROW SIGN - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u2017' # 0x008d -> DOUBLE LOW LINE - u'\xc0' # 0x008e -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xa7' # 0x008f -> SECTION SIGN - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xc8' # 0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xca' # 0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xcb' # 0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcf' # 0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xa4' # 0x0098 -> CURRENCY SIGN - u'\xd4' # 0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xa2' # 0x009b -> CENT SIGN - u'\xa3' # 0x009c -> POUND SIGN - u'\xd9' # 0x009d -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xdb' # 0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xa6' # 0x00a0 -> BROKEN BAR - u'\xb4' # 0x00a1 -> ACUTE ACCENT - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xa8' # 0x00a4 -> DIAERESIS - u'\xb8' # 0x00a5 -> CEDILLA - u'\xb3' # 0x00a6 -> SUPERSCRIPT THREE - u'\xaf' # 0x00a7 -> MACRON - u'\xce' # 0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xbe' # 0x00ad -> VULGAR FRACTION THREE QUARTERS - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a2: 0x009b, # CENT SIGN - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x0098, # CURRENCY SIGN - 0x00a6: 0x00a0, # BROKEN BAR - 0x00a7: 0x008f, # SECTION SIGN - 0x00a8: 0x00a4, # DIAERESIS - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00af: 0x00a7, # MACRON - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b3: 0x00a6, # SUPERSCRIPT THREE - 0x00b4: 0x00a1, # ACUTE ACCENT - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b6: 0x0086, # PILCROW SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00b8: 0x00a5, # CEDILLA - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00be: 0x00ad, # VULGAR FRACTION THREE QUARTERS - 0x00c0: 0x008e, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00c2: 0x0084, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c8: 0x0091, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00ca: 0x0092, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00cb: 0x0094, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00ce: 0x00a8, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00cf: 0x0095, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00d4: 0x0099, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00d9: 0x009d, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00db: 0x009e, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x2017: 0x008d, # DOUBLE LOW LINE - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp864.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,690 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP864.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp864', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0025: 0x066a, # ARABIC PERCENT SIGN - 0x0080: 0x00b0, # DEGREE SIGN - 0x0081: 0x00b7, # MIDDLE DOT - 0x0082: 0x2219, # BULLET OPERATOR - 0x0083: 0x221a, # SQUARE ROOT - 0x0084: 0x2592, # MEDIUM SHADE - 0x0085: 0x2500, # FORMS LIGHT HORIZONTAL - 0x0086: 0x2502, # FORMS LIGHT VERTICAL - 0x0087: 0x253c, # FORMS LIGHT VERTICAL AND HORIZONTAL - 0x0088: 0x2524, # FORMS LIGHT VERTICAL AND LEFT - 0x0089: 0x252c, # FORMS LIGHT DOWN AND HORIZONTAL - 0x008a: 0x251c, # FORMS LIGHT VERTICAL AND RIGHT - 0x008b: 0x2534, # FORMS LIGHT UP AND HORIZONTAL - 0x008c: 0x2510, # FORMS LIGHT DOWN AND LEFT - 0x008d: 0x250c, # FORMS LIGHT DOWN AND RIGHT - 0x008e: 0x2514, # FORMS LIGHT UP AND RIGHT - 0x008f: 0x2518, # FORMS LIGHT UP AND LEFT - 0x0090: 0x03b2, # GREEK SMALL BETA - 0x0091: 0x221e, # INFINITY - 0x0092: 0x03c6, # GREEK SMALL PHI - 0x0093: 0x00b1, # PLUS-OR-MINUS SIGN - 0x0094: 0x00bd, # FRACTION 1/2 - 0x0095: 0x00bc, # FRACTION 1/4 - 0x0096: 0x2248, # ALMOST EQUAL TO - 0x0097: 0x00ab, # LEFT POINTING GUILLEMET - 0x0098: 0x00bb, # RIGHT POINTING GUILLEMET - 0x0099: 0xfef7, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x009a: 0xfef8, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - 0x009b: None, # UNDEFINED - 0x009c: None, # UNDEFINED - 0x009d: 0xfefb, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - 0x009e: 0xfefc, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM - 0x009f: None, # UNDEFINED - 0x00a1: 0x00ad, # SOFT HYPHEN - 0x00a2: 0xfe82, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - 0x00a5: 0xfe84, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - 0x00a6: None, # UNDEFINED - 0x00a7: None, # UNDEFINED - 0x00a8: 0xfe8e, # ARABIC LETTER ALEF FINAL FORM - 0x00a9: 0xfe8f, # ARABIC LETTER BEH ISOLATED FORM - 0x00aa: 0xfe95, # ARABIC LETTER TEH ISOLATED FORM - 0x00ab: 0xfe99, # ARABIC LETTER THEH ISOLATED FORM - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ad: 0xfe9d, # ARABIC LETTER JEEM ISOLATED FORM - 0x00ae: 0xfea1, # ARABIC LETTER HAH ISOLATED FORM - 0x00af: 0xfea5, # ARABIC LETTER KHAH ISOLATED FORM - 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO - 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE - 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO - 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE - 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR - 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE - 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX - 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN - 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT - 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE - 0x00ba: 0xfed1, # ARABIC LETTER FEH ISOLATED FORM - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: 0xfeb1, # ARABIC LETTER SEEN ISOLATED FORM - 0x00bd: 0xfeb5, # ARABIC LETTER SHEEN ISOLATED FORM - 0x00be: 0xfeb9, # ARABIC LETTER SAD ISOLATED FORM - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x00a2, # CENT SIGN - 0x00c1: 0xfe80, # ARABIC LETTER HAMZA ISOLATED FORM - 0x00c2: 0xfe81, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00c3: 0xfe83, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - 0x00c4: 0xfe85, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0x00c5: 0xfeca, # ARABIC LETTER AIN FINAL FORM - 0x00c6: 0xfe8b, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0x00c7: 0xfe8d, # ARABIC LETTER ALEF ISOLATED FORM - 0x00c8: 0xfe91, # ARABIC LETTER BEH INITIAL FORM - 0x00c9: 0xfe93, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0x00ca: 0xfe97, # ARABIC LETTER TEH INITIAL FORM - 0x00cb: 0xfe9b, # ARABIC LETTER THEH INITIAL FORM - 0x00cc: 0xfe9f, # ARABIC LETTER JEEM INITIAL FORM - 0x00cd: 0xfea3, # ARABIC LETTER HAH INITIAL FORM - 0x00ce: 0xfea7, # ARABIC LETTER KHAH INITIAL FORM - 0x00cf: 0xfea9, # ARABIC LETTER DAL ISOLATED FORM - 0x00d0: 0xfeab, # ARABIC LETTER THAL ISOLATED FORM - 0x00d1: 0xfead, # ARABIC LETTER REH ISOLATED FORM - 0x00d2: 0xfeaf, # ARABIC LETTER ZAIN ISOLATED FORM - 0x00d3: 0xfeb3, # ARABIC LETTER SEEN INITIAL FORM - 0x00d4: 0xfeb7, # ARABIC LETTER SHEEN INITIAL FORM - 0x00d5: 0xfebb, # ARABIC LETTER SAD INITIAL FORM - 0x00d6: 0xfebf, # ARABIC LETTER DAD INITIAL FORM - 0x00d7: 0xfec1, # ARABIC LETTER TAH ISOLATED FORM - 0x00d8: 0xfec5, # ARABIC LETTER ZAH ISOLATED FORM - 0x00d9: 0xfecb, # ARABIC LETTER AIN INITIAL FORM - 0x00da: 0xfecf, # ARABIC LETTER GHAIN INITIAL FORM - 0x00db: 0x00a6, # BROKEN VERTICAL BAR - 0x00dc: 0x00ac, # NOT SIGN - 0x00dd: 0x00f7, # DIVISION SIGN - 0x00de: 0x00d7, # MULTIPLICATION SIGN - 0x00df: 0xfec9, # ARABIC LETTER AIN ISOLATED FORM - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0xfed3, # ARABIC LETTER FEH INITIAL FORM - 0x00e2: 0xfed7, # ARABIC LETTER QAF INITIAL FORM - 0x00e3: 0xfedb, # ARABIC LETTER KAF INITIAL FORM - 0x00e4: 0xfedf, # ARABIC LETTER LAM INITIAL FORM - 0x00e5: 0xfee3, # ARABIC LETTER MEEM INITIAL FORM - 0x00e6: 0xfee7, # ARABIC LETTER NOON INITIAL FORM - 0x00e7: 0xfeeb, # ARABIC LETTER HEH INITIAL FORM - 0x00e8: 0xfeed, # ARABIC LETTER WAW ISOLATED FORM - 0x00e9: 0xfeef, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM - 0x00ea: 0xfef3, # ARABIC LETTER YEH INITIAL FORM - 0x00eb: 0xfebd, # ARABIC LETTER DAD ISOLATED FORM - 0x00ec: 0xfecc, # ARABIC LETTER AIN MEDIAL FORM - 0x00ed: 0xfece, # ARABIC LETTER GHAIN FINAL FORM - 0x00ee: 0xfecd, # ARABIC LETTER GHAIN ISOLATED FORM - 0x00ef: 0xfee1, # ARABIC LETTER MEEM ISOLATED FORM - 0x00f0: 0xfe7d, # ARABIC SHADDA MEDIAL FORM - 0x00f1: 0x0651, # ARABIC SHADDAH - 0x00f2: 0xfee5, # ARABIC LETTER NOON ISOLATED FORM - 0x00f3: 0xfee9, # ARABIC LETTER HEH ISOLATED FORM - 0x00f4: 0xfeec, # ARABIC LETTER HEH MEDIAL FORM - 0x00f5: 0xfef0, # ARABIC LETTER ALEF MAKSURA FINAL FORM - 0x00f6: 0xfef2, # ARABIC LETTER YEH FINAL FORM - 0x00f7: 0xfed0, # ARABIC LETTER GHAIN MEDIAL FORM - 0x00f8: 0xfed5, # ARABIC LETTER QAF ISOLATED FORM - 0x00f9: 0xfef5, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - 0x00fa: 0xfef6, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - 0x00fb: 0xfedd, # ARABIC LETTER LAM ISOLATED FORM - 0x00fc: 0xfed9, # ARABIC LETTER KAF ISOLATED FORM - 0x00fd: 0xfef1, # ARABIC LETTER YEH ISOLATED FORM - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: None, # UNDEFINED -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'\u066a' # 0x0025 -> ARABIC PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xb0' # 0x0080 -> DEGREE SIGN - u'\xb7' # 0x0081 -> MIDDLE DOT - u'\u2219' # 0x0082 -> BULLET OPERATOR - u'\u221a' # 0x0083 -> SQUARE ROOT - u'\u2592' # 0x0084 -> MEDIUM SHADE - u'\u2500' # 0x0085 -> FORMS LIGHT HORIZONTAL - u'\u2502' # 0x0086 -> FORMS LIGHT VERTICAL - u'\u253c' # 0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL - u'\u2524' # 0x0088 -> FORMS LIGHT VERTICAL AND LEFT - u'\u252c' # 0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x008a -> FORMS LIGHT VERTICAL AND RIGHT - u'\u2534' # 0x008b -> FORMS LIGHT UP AND HORIZONTAL - u'\u2510' # 0x008c -> FORMS LIGHT DOWN AND LEFT - u'\u250c' # 0x008d -> FORMS LIGHT DOWN AND RIGHT - u'\u2514' # 0x008e -> FORMS LIGHT UP AND RIGHT - u'\u2518' # 0x008f -> FORMS LIGHT UP AND LEFT - u'\u03b2' # 0x0090 -> GREEK SMALL BETA - u'\u221e' # 0x0091 -> INFINITY - u'\u03c6' # 0x0092 -> GREEK SMALL PHI - u'\xb1' # 0x0093 -> PLUS-OR-MINUS SIGN - u'\xbd' # 0x0094 -> FRACTION 1/2 - u'\xbc' # 0x0095 -> FRACTION 1/4 - u'\u2248' # 0x0096 -> ALMOST EQUAL TO - u'\xab' # 0x0097 -> LEFT POINTING GUILLEMET - u'\xbb' # 0x0098 -> RIGHT POINTING GUILLEMET - u'\ufef7' # 0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - u'\ufef8' # 0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - u'\ufffe' # 0x009b -> UNDEFINED - u'\ufffe' # 0x009c -> UNDEFINED - u'\ufefb' # 0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - u'\ufefc' # 0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM - u'\ufffe' # 0x009f -> UNDEFINED - u'\xa0' # 0x00a0 -> NON-BREAKING SPACE - u'\xad' # 0x00a1 -> SOFT HYPHEN - u'\ufe82' # 0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - u'\xa3' # 0x00a3 -> POUND SIGN - u'\xa4' # 0x00a4 -> CURRENCY SIGN - u'\ufe84' # 0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - u'\ufffe' # 0x00a6 -> UNDEFINED - u'\ufffe' # 0x00a7 -> UNDEFINED - u'\ufe8e' # 0x00a8 -> ARABIC LETTER ALEF FINAL FORM - u'\ufe8f' # 0x00a9 -> ARABIC LETTER BEH ISOLATED FORM - u'\ufe95' # 0x00aa -> ARABIC LETTER TEH ISOLATED FORM - u'\ufe99' # 0x00ab -> ARABIC LETTER THEH ISOLATED FORM - u'\u060c' # 0x00ac -> ARABIC COMMA - u'\ufe9d' # 0x00ad -> ARABIC LETTER JEEM ISOLATED FORM - u'\ufea1' # 0x00ae -> ARABIC LETTER HAH ISOLATED FORM - u'\ufea5' # 0x00af -> ARABIC LETTER KHAH ISOLATED FORM - u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO - u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE - u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO - u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE - u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR - u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE - u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX - u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN - u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT - u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE - u'\ufed1' # 0x00ba -> ARABIC LETTER FEH ISOLATED FORM - u'\u061b' # 0x00bb -> ARABIC SEMICOLON - u'\ufeb1' # 0x00bc -> ARABIC LETTER SEEN ISOLATED FORM - u'\ufeb5' # 0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM - u'\ufeb9' # 0x00be -> ARABIC LETTER SAD ISOLATED FORM - u'\u061f' # 0x00bf -> ARABIC QUESTION MARK - u'\xa2' # 0x00c0 -> CENT SIGN - u'\ufe80' # 0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM - u'\ufe81' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufe83' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - u'\ufe85' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - u'\ufeca' # 0x00c5 -> ARABIC LETTER AIN FINAL FORM - u'\ufe8b' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - u'\ufe8d' # 0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM - u'\ufe91' # 0x00c8 -> ARABIC LETTER BEH INITIAL FORM - u'\ufe93' # 0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM - u'\ufe97' # 0x00ca -> ARABIC LETTER TEH INITIAL FORM - u'\ufe9b' # 0x00cb -> ARABIC LETTER THEH INITIAL FORM - u'\ufe9f' # 0x00cc -> ARABIC LETTER JEEM INITIAL FORM - u'\ufea3' # 0x00cd -> ARABIC LETTER HAH INITIAL FORM - u'\ufea7' # 0x00ce -> ARABIC LETTER KHAH INITIAL FORM - u'\ufea9' # 0x00cf -> ARABIC LETTER DAL ISOLATED FORM - u'\ufeab' # 0x00d0 -> ARABIC LETTER THAL ISOLATED FORM - u'\ufead' # 0x00d1 -> ARABIC LETTER REH ISOLATED FORM - u'\ufeaf' # 0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM - u'\ufeb3' # 0x00d3 -> ARABIC LETTER SEEN INITIAL FORM - u'\ufeb7' # 0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM - u'\ufebb' # 0x00d5 -> ARABIC LETTER SAD INITIAL FORM - u'\ufebf' # 0x00d6 -> ARABIC LETTER DAD INITIAL FORM - u'\ufec1' # 0x00d7 -> ARABIC LETTER TAH ISOLATED FORM - u'\ufec5' # 0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM - u'\ufecb' # 0x00d9 -> ARABIC LETTER AIN INITIAL FORM - u'\ufecf' # 0x00da -> ARABIC LETTER GHAIN INITIAL FORM - u'\xa6' # 0x00db -> BROKEN VERTICAL BAR - u'\xac' # 0x00dc -> NOT SIGN - u'\xf7' # 0x00dd -> DIVISION SIGN - u'\xd7' # 0x00de -> MULTIPLICATION SIGN - u'\ufec9' # 0x00df -> ARABIC LETTER AIN ISOLATED FORM - u'\u0640' # 0x00e0 -> ARABIC TATWEEL - u'\ufed3' # 0x00e1 -> ARABIC LETTER FEH INITIAL FORM - u'\ufed7' # 0x00e2 -> ARABIC LETTER QAF INITIAL FORM - u'\ufedb' # 0x00e3 -> ARABIC LETTER KAF INITIAL FORM - u'\ufedf' # 0x00e4 -> ARABIC LETTER LAM INITIAL FORM - u'\ufee3' # 0x00e5 -> ARABIC LETTER MEEM INITIAL FORM - u'\ufee7' # 0x00e6 -> ARABIC LETTER NOON INITIAL FORM - u'\ufeeb' # 0x00e7 -> ARABIC LETTER HEH INITIAL FORM - u'\ufeed' # 0x00e8 -> ARABIC LETTER WAW ISOLATED FORM - u'\ufeef' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM - u'\ufef3' # 0x00ea -> ARABIC LETTER YEH INITIAL FORM - u'\ufebd' # 0x00eb -> ARABIC LETTER DAD ISOLATED FORM - u'\ufecc' # 0x00ec -> ARABIC LETTER AIN MEDIAL FORM - u'\ufece' # 0x00ed -> ARABIC LETTER GHAIN FINAL FORM - u'\ufecd' # 0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM - u'\ufee1' # 0x00ef -> ARABIC LETTER MEEM ISOLATED FORM - u'\ufe7d' # 0x00f0 -> ARABIC SHADDA MEDIAL FORM - u'\u0651' # 0x00f1 -> ARABIC SHADDAH - u'\ufee5' # 0x00f2 -> ARABIC LETTER NOON ISOLATED FORM - u'\ufee9' # 0x00f3 -> ARABIC LETTER HEH ISOLATED FORM - u'\ufeec' # 0x00f4 -> ARABIC LETTER HEH MEDIAL FORM - u'\ufef0' # 0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM - u'\ufef2' # 0x00f6 -> ARABIC LETTER YEH FINAL FORM - u'\ufed0' # 0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM - u'\ufed5' # 0x00f8 -> ARABIC LETTER QAF ISOLATED FORM - u'\ufef5' # 0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - u'\ufef6' # 0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - u'\ufedd' # 0x00fb -> ARABIC LETTER LAM ISOLATED FORM - u'\ufed9' # 0x00fc -> ARABIC LETTER KAF ISOLATED FORM - u'\ufef1' # 0x00fd -> ARABIC LETTER YEH ISOLATED FORM - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\ufffe' # 0x00ff -> UNDEFINED -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00a0, # NON-BREAKING SPACE - 0x00a2: 0x00c0, # CENT SIGN - 0x00a3: 0x00a3, # POUND SIGN - 0x00a4: 0x00a4, # CURRENCY SIGN - 0x00a6: 0x00db, # BROKEN VERTICAL BAR - 0x00ab: 0x0097, # LEFT POINTING GUILLEMET - 0x00ac: 0x00dc, # NOT SIGN - 0x00ad: 0x00a1, # SOFT HYPHEN - 0x00b0: 0x0080, # DEGREE SIGN - 0x00b1: 0x0093, # PLUS-OR-MINUS SIGN - 0x00b7: 0x0081, # MIDDLE DOT - 0x00bb: 0x0098, # RIGHT POINTING GUILLEMET - 0x00bc: 0x0095, # FRACTION 1/4 - 0x00bd: 0x0094, # FRACTION 1/2 - 0x00d7: 0x00de, # MULTIPLICATION SIGN - 0x00f7: 0x00dd, # DIVISION SIGN - 0x03b2: 0x0090, # GREEK SMALL BETA - 0x03c6: 0x0092, # GREEK SMALL PHI - 0x060c: 0x00ac, # ARABIC COMMA - 0x061b: 0x00bb, # ARABIC SEMICOLON - 0x061f: 0x00bf, # ARABIC QUESTION MARK - 0x0640: 0x00e0, # ARABIC TATWEEL - 0x0651: 0x00f1, # ARABIC SHADDAH - 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO - 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE - 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO - 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE - 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR - 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE - 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX - 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN - 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT - 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE - 0x066a: 0x0025, # ARABIC PERCENT SIGN - 0x2219: 0x0082, # BULLET OPERATOR - 0x221a: 0x0083, # SQUARE ROOT - 0x221e: 0x0091, # INFINITY - 0x2248: 0x0096, # ALMOST EQUAL TO - 0x2500: 0x0085, # FORMS LIGHT HORIZONTAL - 0x2502: 0x0086, # FORMS LIGHT VERTICAL - 0x250c: 0x008d, # FORMS LIGHT DOWN AND RIGHT - 0x2510: 0x008c, # FORMS LIGHT DOWN AND LEFT - 0x2514: 0x008e, # FORMS LIGHT UP AND RIGHT - 0x2518: 0x008f, # FORMS LIGHT UP AND LEFT - 0x251c: 0x008a, # FORMS LIGHT VERTICAL AND RIGHT - 0x2524: 0x0088, # FORMS LIGHT VERTICAL AND LEFT - 0x252c: 0x0089, # FORMS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x008b, # FORMS LIGHT UP AND HORIZONTAL - 0x253c: 0x0087, # FORMS LIGHT VERTICAL AND HORIZONTAL - 0x2592: 0x0084, # MEDIUM SHADE - 0x25a0: 0x00fe, # BLACK SQUARE - 0xfe7d: 0x00f0, # ARABIC SHADDA MEDIAL FORM - 0xfe80: 0x00c1, # ARABIC LETTER HAMZA ISOLATED FORM - 0xfe81: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM - 0xfe82: 0x00a2, # ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM - 0xfe83: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM - 0xfe84: 0x00a5, # ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM - 0xfe85: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM - 0xfe8b: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM - 0xfe8d: 0x00c7, # ARABIC LETTER ALEF ISOLATED FORM - 0xfe8e: 0x00a8, # ARABIC LETTER ALEF FINAL FORM - 0xfe8f: 0x00a9, # ARABIC LETTER BEH ISOLATED FORM - 0xfe91: 0x00c8, # ARABIC LETTER BEH INITIAL FORM - 0xfe93: 0x00c9, # ARABIC LETTER TEH MARBUTA ISOLATED FORM - 0xfe95: 0x00aa, # ARABIC LETTER TEH ISOLATED FORM - 0xfe97: 0x00ca, # ARABIC LETTER TEH INITIAL FORM - 0xfe99: 0x00ab, # ARABIC LETTER THEH ISOLATED FORM - 0xfe9b: 0x00cb, # ARABIC LETTER THEH INITIAL FORM - 0xfe9d: 0x00ad, # ARABIC LETTER JEEM ISOLATED FORM - 0xfe9f: 0x00cc, # ARABIC LETTER JEEM INITIAL FORM - 0xfea1: 0x00ae, # ARABIC LETTER HAH ISOLATED FORM - 0xfea3: 0x00cd, # ARABIC LETTER HAH INITIAL FORM - 0xfea5: 0x00af, # ARABIC LETTER KHAH ISOLATED FORM - 0xfea7: 0x00ce, # ARABIC LETTER KHAH INITIAL FORM - 0xfea9: 0x00cf, # ARABIC LETTER DAL ISOLATED FORM - 0xfeab: 0x00d0, # ARABIC LETTER THAL ISOLATED FORM - 0xfead: 0x00d1, # ARABIC LETTER REH ISOLATED FORM - 0xfeaf: 0x00d2, # ARABIC LETTER ZAIN ISOLATED FORM - 0xfeb1: 0x00bc, # ARABIC LETTER SEEN ISOLATED FORM - 0xfeb3: 0x00d3, # ARABIC LETTER SEEN INITIAL FORM - 0xfeb5: 0x00bd, # ARABIC LETTER SHEEN ISOLATED FORM - 0xfeb7: 0x00d4, # ARABIC LETTER SHEEN INITIAL FORM - 0xfeb9: 0x00be, # ARABIC LETTER SAD ISOLATED FORM - 0xfebb: 0x00d5, # ARABIC LETTER SAD INITIAL FORM - 0xfebd: 0x00eb, # ARABIC LETTER DAD ISOLATED FORM - 0xfebf: 0x00d6, # ARABIC LETTER DAD INITIAL FORM - 0xfec1: 0x00d7, # ARABIC LETTER TAH ISOLATED FORM - 0xfec5: 0x00d8, # ARABIC LETTER ZAH ISOLATED FORM - 0xfec9: 0x00df, # ARABIC LETTER AIN ISOLATED FORM - 0xfeca: 0x00c5, # ARABIC LETTER AIN FINAL FORM - 0xfecb: 0x00d9, # ARABIC LETTER AIN INITIAL FORM - 0xfecc: 0x00ec, # ARABIC LETTER AIN MEDIAL FORM - 0xfecd: 0x00ee, # ARABIC LETTER GHAIN ISOLATED FORM - 0xfece: 0x00ed, # ARABIC LETTER GHAIN FINAL FORM - 0xfecf: 0x00da, # ARABIC LETTER GHAIN INITIAL FORM - 0xfed0: 0x00f7, # ARABIC LETTER GHAIN MEDIAL FORM - 0xfed1: 0x00ba, # ARABIC LETTER FEH ISOLATED FORM - 0xfed3: 0x00e1, # ARABIC LETTER FEH INITIAL FORM - 0xfed5: 0x00f8, # ARABIC LETTER QAF ISOLATED FORM - 0xfed7: 0x00e2, # ARABIC LETTER QAF INITIAL FORM - 0xfed9: 0x00fc, # ARABIC LETTER KAF ISOLATED FORM - 0xfedb: 0x00e3, # ARABIC LETTER KAF INITIAL FORM - 0xfedd: 0x00fb, # ARABIC LETTER LAM ISOLATED FORM - 0xfedf: 0x00e4, # ARABIC LETTER LAM INITIAL FORM - 0xfee1: 0x00ef, # ARABIC LETTER MEEM ISOLATED FORM - 0xfee3: 0x00e5, # ARABIC LETTER MEEM INITIAL FORM - 0xfee5: 0x00f2, # ARABIC LETTER NOON ISOLATED FORM - 0xfee7: 0x00e6, # ARABIC LETTER NOON INITIAL FORM - 0xfee9: 0x00f3, # ARABIC LETTER HEH ISOLATED FORM - 0xfeeb: 0x00e7, # ARABIC LETTER HEH INITIAL FORM - 0xfeec: 0x00f4, # ARABIC LETTER HEH MEDIAL FORM - 0xfeed: 0x00e8, # ARABIC LETTER WAW ISOLATED FORM - 0xfeef: 0x00e9, # ARABIC LETTER ALEF MAKSURA ISOLATED FORM - 0xfef0: 0x00f5, # ARABIC LETTER ALEF MAKSURA FINAL FORM - 0xfef1: 0x00fd, # ARABIC LETTER YEH ISOLATED FORM - 0xfef2: 0x00f6, # ARABIC LETTER YEH FINAL FORM - 0xfef3: 0x00ea, # ARABIC LETTER YEH INITIAL FORM - 0xfef5: 0x00f9, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM - 0xfef6: 0x00fa, # ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM - 0xfef7: 0x0099, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM - 0xfef8: 0x009a, # ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM - 0xfefb: 0x009d, # ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM - 0xfefc: 0x009e, # ARABIC LIGATURE LAM WITH ALEF FINAL FORM -}
--- a/test/lib/python2.7/encodings/cp865.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP865.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp865', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0081: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x0082: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x0083: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x0084: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x0085: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0086: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x0087: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x0088: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0089: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x008a: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x008b: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x008c: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x008d: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x008e: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x008f: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x0090: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0091: 0x00e6, # LATIN SMALL LIGATURE AE - 0x0092: 0x00c6, # LATIN CAPITAL LIGATURE AE - 0x0093: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x0094: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x0095: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x0096: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x0097: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x0098: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0099: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x009a: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x009b: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x009e: 0x20a7, # PESETA SIGN - 0x009f: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00a0: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00a1: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00a2: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00a3: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00a4: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00a5: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00a6: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00a7: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00a8: 0x00bf, # INVERTED QUESTION MARK - 0x00a9: 0x2310, # REVERSED NOT SIGN - 0x00aa: 0x00ac, # NOT SIGN - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00ad: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00a4, # CURRENCY SIGN - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00e1: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00e2: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00e3: 0x03c0, # GREEK SMALL LETTER PI - 0x00e4: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00e5: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00e6: 0x00b5, # MICRO SIGN - 0x00e7: 0x03c4, # GREEK SMALL LETTER TAU - 0x00e8: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00e9: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ea: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00eb: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00ec: 0x221e, # INFINITY - 0x00ed: 0x03c6, # GREEK SMALL LETTER PHI - 0x00ee: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00ef: 0x2229, # INTERSECTION - 0x00f0: 0x2261, # IDENTICAL TO - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00f3: 0x2264, # LESS-THAN OR EQUAL TO - 0x00f4: 0x2320, # TOP HALF INTEGRAL - 0x00f5: 0x2321, # BOTTOM HALF INTEGRAL - 0x00f6: 0x00f7, # DIVISION SIGN - 0x00f7: 0x2248, # ALMOST EQUAL TO - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x207f, # SUPERSCRIPT LATIN SMALL LETTER N - 0x00fd: 0x00b2, # SUPERSCRIPT TWO - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\xc7' # 0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xfc' # 0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xe9' # 0x0082 -> LATIN SMALL LETTER E WITH ACUTE - u'\xe2' # 0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe0' # 0x0085 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe5' # 0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x0087 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xea' # 0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xe8' # 0x008a -> LATIN SMALL LETTER E WITH GRAVE - u'\xef' # 0x008b -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xee' # 0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xec' # 0x008d -> LATIN SMALL LETTER I WITH GRAVE - u'\xc4' # 0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc9' # 0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xe6' # 0x0091 -> LATIN SMALL LIGATURE AE - u'\xc6' # 0x0092 -> LATIN CAPITAL LIGATURE AE - u'\xf4' # 0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf2' # 0x0095 -> LATIN SMALL LETTER O WITH GRAVE - u'\xfb' # 0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xf9' # 0x0097 -> LATIN SMALL LETTER U WITH GRAVE - u'\xff' # 0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\xd6' # 0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xf8' # 0x009b -> LATIN SMALL LETTER O WITH STROKE - u'\xa3' # 0x009c -> POUND SIGN - u'\xd8' # 0x009d -> LATIN CAPITAL LETTER O WITH STROKE - u'\u20a7' # 0x009e -> PESETA SIGN - u'\u0192' # 0x009f -> LATIN SMALL LETTER F WITH HOOK - u'\xe1' # 0x00a0 -> LATIN SMALL LETTER A WITH ACUTE - u'\xed' # 0x00a1 -> LATIN SMALL LETTER I WITH ACUTE - u'\xf3' # 0x00a2 -> LATIN SMALL LETTER O WITH ACUTE - u'\xfa' # 0x00a3 -> LATIN SMALL LETTER U WITH ACUTE - u'\xf1' # 0x00a4 -> LATIN SMALL LETTER N WITH TILDE - u'\xd1' # 0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xaa' # 0x00a6 -> FEMININE ORDINAL INDICATOR - u'\xba' # 0x00a7 -> MASCULINE ORDINAL INDICATOR - u'\xbf' # 0x00a8 -> INVERTED QUESTION MARK - u'\u2310' # 0x00a9 -> REVERSED NOT SIGN - u'\xac' # 0x00aa -> NOT SIGN - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\xbc' # 0x00ac -> VULGAR FRACTION ONE QUARTER - u'\xa1' # 0x00ad -> INVERTED EXCLAMATION MARK - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xa4' # 0x00af -> CURRENCY SIGN - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b1' # 0x00e0 -> GREEK SMALL LETTER ALPHA - u'\xdf' # 0x00e1 -> LATIN SMALL LETTER SHARP S - u'\u0393' # 0x00e2 -> GREEK CAPITAL LETTER GAMMA - u'\u03c0' # 0x00e3 -> GREEK SMALL LETTER PI - u'\u03a3' # 0x00e4 -> GREEK CAPITAL LETTER SIGMA - u'\u03c3' # 0x00e5 -> GREEK SMALL LETTER SIGMA - u'\xb5' # 0x00e6 -> MICRO SIGN - u'\u03c4' # 0x00e7 -> GREEK SMALL LETTER TAU - u'\u03a6' # 0x00e8 -> GREEK CAPITAL LETTER PHI - u'\u0398' # 0x00e9 -> GREEK CAPITAL LETTER THETA - u'\u03a9' # 0x00ea -> GREEK CAPITAL LETTER OMEGA - u'\u03b4' # 0x00eb -> GREEK SMALL LETTER DELTA - u'\u221e' # 0x00ec -> INFINITY - u'\u03c6' # 0x00ed -> GREEK SMALL LETTER PHI - u'\u03b5' # 0x00ee -> GREEK SMALL LETTER EPSILON - u'\u2229' # 0x00ef -> INTERSECTION - u'\u2261' # 0x00f0 -> IDENTICAL TO - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u2265' # 0x00f2 -> GREATER-THAN OR EQUAL TO - u'\u2264' # 0x00f3 -> LESS-THAN OR EQUAL TO - u'\u2320' # 0x00f4 -> TOP HALF INTEGRAL - u'\u2321' # 0x00f5 -> BOTTOM HALF INTEGRAL - u'\xf7' # 0x00f6 -> DIVISION SIGN - u'\u2248' # 0x00f7 -> ALMOST EQUAL TO - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u207f' # 0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N - u'\xb2' # 0x00fd -> SUPERSCRIPT TWO - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a1: 0x00ad, # INVERTED EXCLAMATION MARK - 0x00a3: 0x009c, # POUND SIGN - 0x00a4: 0x00af, # CURRENCY SIGN - 0x00aa: 0x00a6, # FEMININE ORDINAL INDICATOR - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x00aa, # NOT SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x00fd, # SUPERSCRIPT TWO - 0x00b5: 0x00e6, # MICRO SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x00ba: 0x00a7, # MASCULINE ORDINAL INDICATOR - 0x00bc: 0x00ac, # VULGAR FRACTION ONE QUARTER - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x00bf: 0x00a8, # INVERTED QUESTION MARK - 0x00c4: 0x008e, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c5: 0x008f, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00c6: 0x0092, # LATIN CAPITAL LIGATURE AE - 0x00c7: 0x0080, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0090, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x00a5, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x0099, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00d8: 0x009d, # LATIN CAPITAL LETTER O WITH STROKE - 0x00dc: 0x009a, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00df: 0x00e1, # LATIN SMALL LETTER SHARP S - 0x00e0: 0x0085, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x00a0, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0083, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x0084, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e5: 0x0086, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00e6: 0x0091, # LATIN SMALL LIGATURE AE - 0x00e7: 0x0087, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008a, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x0082, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0088, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0089, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ec: 0x008d, # LATIN SMALL LETTER I WITH GRAVE - 0x00ed: 0x00a1, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x008c, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x008b, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x00a4, # LATIN SMALL LETTER N WITH TILDE - 0x00f2: 0x0095, # LATIN SMALL LETTER O WITH GRAVE - 0x00f3: 0x00a2, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0093, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x0094, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x00f6, # DIVISION SIGN - 0x00f8: 0x009b, # LATIN SMALL LETTER O WITH STROKE - 0x00f9: 0x0097, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x00a3, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x0096, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x0081, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00ff: 0x0098, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x0192: 0x009f, # LATIN SMALL LETTER F WITH HOOK - 0x0393: 0x00e2, # GREEK CAPITAL LETTER GAMMA - 0x0398: 0x00e9, # GREEK CAPITAL LETTER THETA - 0x03a3: 0x00e4, # GREEK CAPITAL LETTER SIGMA - 0x03a6: 0x00e8, # GREEK CAPITAL LETTER PHI - 0x03a9: 0x00ea, # GREEK CAPITAL LETTER OMEGA - 0x03b1: 0x00e0, # GREEK SMALL LETTER ALPHA - 0x03b4: 0x00eb, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00ee, # GREEK SMALL LETTER EPSILON - 0x03c0: 0x00e3, # GREEK SMALL LETTER PI - 0x03c3: 0x00e5, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00e7, # GREEK SMALL LETTER TAU - 0x03c6: 0x00ed, # GREEK SMALL LETTER PHI - 0x207f: 0x00fc, # SUPERSCRIPT LATIN SMALL LETTER N - 0x20a7: 0x009e, # PESETA SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x221e: 0x00ec, # INFINITY - 0x2229: 0x00ef, # INTERSECTION - 0x2248: 0x00f7, # ALMOST EQUAL TO - 0x2261: 0x00f0, # IDENTICAL TO - 0x2264: 0x00f3, # LESS-THAN OR EQUAL TO - 0x2265: 0x00f2, # GREATER-THAN OR EQUAL TO - 0x2310: 0x00a9, # REVERSED NOT SIGN - 0x2320: 0x00f4, # TOP HALF INTEGRAL - 0x2321: 0x00f5, # BOTTOM HALF INTEGRAL - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp866.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP866.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp866', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x0081: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x0082: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x0083: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x0084: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x0085: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x0086: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x0087: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x0088: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x0089: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x008a: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x008b: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x008c: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x008d: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x008e: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x008f: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x0090: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x0091: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x0092: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x0093: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x0094: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x0095: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x0096: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x0097: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x0098: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x0099: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x009a: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x009b: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x009c: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x009d: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x009e: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x009f: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00a0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00a1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00a2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00a3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00a4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00a5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00a6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00a7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00a8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00a9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00aa: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00ab: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ac: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ad: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ae: 0x043e, # CYRILLIC SMALL LETTER O - 0x00af: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x2561, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x00b6: 0x2562, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x00b7: 0x2556, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x00b8: 0x2555, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x255c, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x00be: 0x255b, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x255e, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x00c7: 0x255f, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x2567, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x00d0: 0x2568, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x00d1: 0x2564, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x00d2: 0x2565, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x00d3: 0x2559, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x00d4: 0x2558, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x00d5: 0x2552, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x00d6: 0x2553, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x00d7: 0x256b, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x00d8: 0x256a, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x258c, # LEFT HALF BLOCK - 0x00de: 0x2590, # RIGHT HALF BLOCK - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00e1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00e2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00e3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00e4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00e5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00e6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00e7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00e8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00e9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00ea: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00eb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00ec: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00ed: 0x044d, # CYRILLIC SMALL LETTER E - 0x00ee: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ef: 0x044f, # CYRILLIC SMALL LETTER YA - 0x00f0: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00f1: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00f2: 0x0404, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x00f3: 0x0454, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x00f4: 0x0407, # CYRILLIC CAPITAL LETTER YI - 0x00f5: 0x0457, # CYRILLIC SMALL LETTER YI - 0x00f6: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U - 0x00f7: 0x045e, # CYRILLIC SMALL LETTER SHORT U - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x2219, # BULLET OPERATOR - 0x00fa: 0x00b7, # MIDDLE DOT - 0x00fb: 0x221a, # SQUARE ROOT - 0x00fc: 0x2116, # NUMERO SIGN - 0x00fd: 0x00a4, # CURRENCY SIGN - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\u0410' # 0x0080 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0x0081 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0x0082 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0x0083 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0x0084 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0x0085 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0x0086 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0x0087 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0x0088 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0x0089 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0x008a -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0x008b -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0x008c -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0x008d -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0x008e -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0x008f -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0x0090 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0x0091 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0x0092 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0x0093 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0x0094 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0x0095 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0x0096 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0x0097 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0x0098 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0x0099 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0x009b -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0x009d -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0x009e -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0x009f -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0x00a0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0x00a1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0x00a2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0x00a3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0x00a4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0x00a5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0x00a6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0x00a7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0x00a8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0x00a9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0x00aa -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0x00ab -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0x00ac -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0x00ad -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0x00ae -> CYRILLIC SMALL LETTER O - u'\u043f' # 0x00af -> CYRILLIC SMALL LETTER PE - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u2561' # 0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u2562' # 0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2556' # 0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2555' # 0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255c' # 0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255b' # 0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u255e' # 0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u2567' # 0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2564' # 0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2559' # 0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u2558' # 0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2552' # 0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u2553' # 0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u256b' # 0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256a' # 0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u258c' # 0x00dd -> LEFT HALF BLOCK - u'\u2590' # 0x00de -> RIGHT HALF BLOCK - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u0440' # 0x00e0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0x00e1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0x00e2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0x00e3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0x00e4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0x00e5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0x00e6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0x00e7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0x00e8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0x00e9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0x00ea -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0x00eb -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0x00ed -> CYRILLIC SMALL LETTER E - u'\u044e' # 0x00ee -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0x00ef -> CYRILLIC SMALL LETTER YA - u'\u0401' # 0x00f0 -> CYRILLIC CAPITAL LETTER IO - u'\u0451' # 0x00f1 -> CYRILLIC SMALL LETTER IO - u'\u0404' # 0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0454' # 0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0407' # 0x00f4 -> CYRILLIC CAPITAL LETTER YI - u'\u0457' # 0x00f5 -> CYRILLIC SMALL LETTER YI - u'\u040e' # 0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0x00f7 -> CYRILLIC SMALL LETTER SHORT U - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\u2219' # 0x00f9 -> BULLET OPERATOR - u'\xb7' # 0x00fa -> MIDDLE DOT - u'\u221a' # 0x00fb -> SQUARE ROOT - u'\u2116' # 0x00fc -> NUMERO SIGN - u'\xa4' # 0x00fd -> CURRENCY SIGN - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a4: 0x00fd, # CURRENCY SIGN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b7: 0x00fa, # MIDDLE DOT - 0x0401: 0x00f0, # CYRILLIC CAPITAL LETTER IO - 0x0404: 0x00f2, # CYRILLIC CAPITAL LETTER UKRAINIAN IE - 0x0407: 0x00f4, # CYRILLIC CAPITAL LETTER YI - 0x040e: 0x00f6, # CYRILLIC CAPITAL LETTER SHORT U - 0x0410: 0x0080, # CYRILLIC CAPITAL LETTER A - 0x0411: 0x0081, # CYRILLIC CAPITAL LETTER BE - 0x0412: 0x0082, # CYRILLIC CAPITAL LETTER VE - 0x0413: 0x0083, # CYRILLIC CAPITAL LETTER GHE - 0x0414: 0x0084, # CYRILLIC CAPITAL LETTER DE - 0x0415: 0x0085, # CYRILLIC CAPITAL LETTER IE - 0x0416: 0x0086, # CYRILLIC CAPITAL LETTER ZHE - 0x0417: 0x0087, # CYRILLIC CAPITAL LETTER ZE - 0x0418: 0x0088, # CYRILLIC CAPITAL LETTER I - 0x0419: 0x0089, # CYRILLIC CAPITAL LETTER SHORT I - 0x041a: 0x008a, # CYRILLIC CAPITAL LETTER KA - 0x041b: 0x008b, # CYRILLIC CAPITAL LETTER EL - 0x041c: 0x008c, # CYRILLIC CAPITAL LETTER EM - 0x041d: 0x008d, # CYRILLIC CAPITAL LETTER EN - 0x041e: 0x008e, # CYRILLIC CAPITAL LETTER O - 0x041f: 0x008f, # CYRILLIC CAPITAL LETTER PE - 0x0420: 0x0090, # CYRILLIC CAPITAL LETTER ER - 0x0421: 0x0091, # CYRILLIC CAPITAL LETTER ES - 0x0422: 0x0092, # CYRILLIC CAPITAL LETTER TE - 0x0423: 0x0093, # CYRILLIC CAPITAL LETTER U - 0x0424: 0x0094, # CYRILLIC CAPITAL LETTER EF - 0x0425: 0x0095, # CYRILLIC CAPITAL LETTER HA - 0x0426: 0x0096, # CYRILLIC CAPITAL LETTER TSE - 0x0427: 0x0097, # CYRILLIC CAPITAL LETTER CHE - 0x0428: 0x0098, # CYRILLIC CAPITAL LETTER SHA - 0x0429: 0x0099, # CYRILLIC CAPITAL LETTER SHCHA - 0x042a: 0x009a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x042b: 0x009b, # CYRILLIC CAPITAL LETTER YERU - 0x042c: 0x009c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x042d: 0x009d, # CYRILLIC CAPITAL LETTER E - 0x042e: 0x009e, # CYRILLIC CAPITAL LETTER YU - 0x042f: 0x009f, # CYRILLIC CAPITAL LETTER YA - 0x0430: 0x00a0, # CYRILLIC SMALL LETTER A - 0x0431: 0x00a1, # CYRILLIC SMALL LETTER BE - 0x0432: 0x00a2, # CYRILLIC SMALL LETTER VE - 0x0433: 0x00a3, # CYRILLIC SMALL LETTER GHE - 0x0434: 0x00a4, # CYRILLIC SMALL LETTER DE - 0x0435: 0x00a5, # CYRILLIC SMALL LETTER IE - 0x0436: 0x00a6, # CYRILLIC SMALL LETTER ZHE - 0x0437: 0x00a7, # CYRILLIC SMALL LETTER ZE - 0x0438: 0x00a8, # CYRILLIC SMALL LETTER I - 0x0439: 0x00a9, # CYRILLIC SMALL LETTER SHORT I - 0x043a: 0x00aa, # CYRILLIC SMALL LETTER KA - 0x043b: 0x00ab, # CYRILLIC SMALL LETTER EL - 0x043c: 0x00ac, # CYRILLIC SMALL LETTER EM - 0x043d: 0x00ad, # CYRILLIC SMALL LETTER EN - 0x043e: 0x00ae, # CYRILLIC SMALL LETTER O - 0x043f: 0x00af, # CYRILLIC SMALL LETTER PE - 0x0440: 0x00e0, # CYRILLIC SMALL LETTER ER - 0x0441: 0x00e1, # CYRILLIC SMALL LETTER ES - 0x0442: 0x00e2, # CYRILLIC SMALL LETTER TE - 0x0443: 0x00e3, # CYRILLIC SMALL LETTER U - 0x0444: 0x00e4, # CYRILLIC SMALL LETTER EF - 0x0445: 0x00e5, # CYRILLIC SMALL LETTER HA - 0x0446: 0x00e6, # CYRILLIC SMALL LETTER TSE - 0x0447: 0x00e7, # CYRILLIC SMALL LETTER CHE - 0x0448: 0x00e8, # CYRILLIC SMALL LETTER SHA - 0x0449: 0x00e9, # CYRILLIC SMALL LETTER SHCHA - 0x044a: 0x00ea, # CYRILLIC SMALL LETTER HARD SIGN - 0x044b: 0x00eb, # CYRILLIC SMALL LETTER YERU - 0x044c: 0x00ec, # CYRILLIC SMALL LETTER SOFT SIGN - 0x044d: 0x00ed, # CYRILLIC SMALL LETTER E - 0x044e: 0x00ee, # CYRILLIC SMALL LETTER YU - 0x044f: 0x00ef, # CYRILLIC SMALL LETTER YA - 0x0451: 0x00f1, # CYRILLIC SMALL LETTER IO - 0x0454: 0x00f3, # CYRILLIC SMALL LETTER UKRAINIAN IE - 0x0457: 0x00f5, # CYRILLIC SMALL LETTER YI - 0x045e: 0x00f7, # CYRILLIC SMALL LETTER SHORT U - 0x2116: 0x00fc, # NUMERO SIGN - 0x2219: 0x00f9, # BULLET OPERATOR - 0x221a: 0x00fb, # SQUARE ROOT - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2552: 0x00d5, # BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - 0x2553: 0x00d6, # BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2555: 0x00b8, # BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - 0x2556: 0x00b7, # BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x2558: 0x00d4, # BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - 0x2559: 0x00d3, # BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255b: 0x00be, # BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - 0x255c: 0x00bd, # BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x255e: 0x00c6, # BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - 0x255f: 0x00c7, # BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2561: 0x00b5, # BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - 0x2562: 0x00b6, # BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2564: 0x00d1, # BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - 0x2565: 0x00d2, # BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2567: 0x00cf, # BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - 0x2568: 0x00d0, # BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256a: 0x00d8, # BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - 0x256b: 0x00d7, # BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x258c: 0x00dd, # LEFT HALF BLOCK - 0x2590: 0x00de, # RIGHT HALF BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp869.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,689 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/MICSFT/PC/CP869.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp869', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: None, # UNDEFINED - 0x0081: None, # UNDEFINED - 0x0082: None, # UNDEFINED - 0x0083: None, # UNDEFINED - 0x0084: None, # UNDEFINED - 0x0085: None, # UNDEFINED - 0x0086: 0x0386, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0087: None, # UNDEFINED - 0x0088: 0x00b7, # MIDDLE DOT - 0x0089: 0x00ac, # NOT SIGN - 0x008a: 0x00a6, # BROKEN BAR - 0x008b: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x008c: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x008d: 0x0388, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x008e: 0x2015, # HORIZONTAL BAR - 0x008f: 0x0389, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x0090: 0x038a, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x0091: 0x03aa, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x0092: 0x038c, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x0093: None, # UNDEFINED - 0x0094: None, # UNDEFINED - 0x0095: 0x038e, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x0096: 0x03ab, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x0097: 0x00a9, # COPYRIGHT SIGN - 0x0098: 0x038f, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0099: 0x00b2, # SUPERSCRIPT TWO - 0x009a: 0x00b3, # SUPERSCRIPT THREE - 0x009b: 0x03ac, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x009c: 0x00a3, # POUND SIGN - 0x009d: 0x03ad, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x009e: 0x03ae, # GREEK SMALL LETTER ETA WITH TONOS - 0x009f: 0x03af, # GREEK SMALL LETTER IOTA WITH TONOS - 0x00a0: 0x03ca, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x00a1: 0x0390, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x00a2: 0x03cc, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x00a3: 0x03cd, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x00a4: 0x0391, # GREEK CAPITAL LETTER ALPHA - 0x00a5: 0x0392, # GREEK CAPITAL LETTER BETA - 0x00a6: 0x0393, # GREEK CAPITAL LETTER GAMMA - 0x00a7: 0x0394, # GREEK CAPITAL LETTER DELTA - 0x00a8: 0x0395, # GREEK CAPITAL LETTER EPSILON - 0x00a9: 0x0396, # GREEK CAPITAL LETTER ZETA - 0x00aa: 0x0397, # GREEK CAPITAL LETTER ETA - 0x00ab: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00ac: 0x0398, # GREEK CAPITAL LETTER THETA - 0x00ad: 0x0399, # GREEK CAPITAL LETTER IOTA - 0x00ae: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00af: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00b0: 0x2591, # LIGHT SHADE - 0x00b1: 0x2592, # MEDIUM SHADE - 0x00b2: 0x2593, # DARK SHADE - 0x00b3: 0x2502, # BOX DRAWINGS LIGHT VERTICAL - 0x00b4: 0x2524, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x00b5: 0x039a, # GREEK CAPITAL LETTER KAPPA - 0x00b6: 0x039b, # GREEK CAPITAL LETTER LAMDA - 0x00b7: 0x039c, # GREEK CAPITAL LETTER MU - 0x00b8: 0x039d, # GREEK CAPITAL LETTER NU - 0x00b9: 0x2563, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x00ba: 0x2551, # BOX DRAWINGS DOUBLE VERTICAL - 0x00bb: 0x2557, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x00bc: 0x255d, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x00bd: 0x039e, # GREEK CAPITAL LETTER XI - 0x00be: 0x039f, # GREEK CAPITAL LETTER OMICRON - 0x00bf: 0x2510, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x00c0: 0x2514, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x00c1: 0x2534, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x00c2: 0x252c, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x00c3: 0x251c, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x00c4: 0x2500, # BOX DRAWINGS LIGHT HORIZONTAL - 0x00c5: 0x253c, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x00c6: 0x03a0, # GREEK CAPITAL LETTER PI - 0x00c7: 0x03a1, # GREEK CAPITAL LETTER RHO - 0x00c8: 0x255a, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x00c9: 0x2554, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x00ca: 0x2569, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x00cb: 0x2566, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x00cc: 0x2560, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x00cd: 0x2550, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x00ce: 0x256c, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x00cf: 0x03a3, # GREEK CAPITAL LETTER SIGMA - 0x00d0: 0x03a4, # GREEK CAPITAL LETTER TAU - 0x00d1: 0x03a5, # GREEK CAPITAL LETTER UPSILON - 0x00d2: 0x03a6, # GREEK CAPITAL LETTER PHI - 0x00d3: 0x03a7, # GREEK CAPITAL LETTER CHI - 0x00d4: 0x03a8, # GREEK CAPITAL LETTER PSI - 0x00d5: 0x03a9, # GREEK CAPITAL LETTER OMEGA - 0x00d6: 0x03b1, # GREEK SMALL LETTER ALPHA - 0x00d7: 0x03b2, # GREEK SMALL LETTER BETA - 0x00d8: 0x03b3, # GREEK SMALL LETTER GAMMA - 0x00d9: 0x2518, # BOX DRAWINGS LIGHT UP AND LEFT - 0x00da: 0x250c, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x00db: 0x2588, # FULL BLOCK - 0x00dc: 0x2584, # LOWER HALF BLOCK - 0x00dd: 0x03b4, # GREEK SMALL LETTER DELTA - 0x00de: 0x03b5, # GREEK SMALL LETTER EPSILON - 0x00df: 0x2580, # UPPER HALF BLOCK - 0x00e0: 0x03b6, # GREEK SMALL LETTER ZETA - 0x00e1: 0x03b7, # GREEK SMALL LETTER ETA - 0x00e2: 0x03b8, # GREEK SMALL LETTER THETA - 0x00e3: 0x03b9, # GREEK SMALL LETTER IOTA - 0x00e4: 0x03ba, # GREEK SMALL LETTER KAPPA - 0x00e5: 0x03bb, # GREEK SMALL LETTER LAMDA - 0x00e6: 0x03bc, # GREEK SMALL LETTER MU - 0x00e7: 0x03bd, # GREEK SMALL LETTER NU - 0x00e8: 0x03be, # GREEK SMALL LETTER XI - 0x00e9: 0x03bf, # GREEK SMALL LETTER OMICRON - 0x00ea: 0x03c0, # GREEK SMALL LETTER PI - 0x00eb: 0x03c1, # GREEK SMALL LETTER RHO - 0x00ec: 0x03c3, # GREEK SMALL LETTER SIGMA - 0x00ed: 0x03c2, # GREEK SMALL LETTER FINAL SIGMA - 0x00ee: 0x03c4, # GREEK SMALL LETTER TAU - 0x00ef: 0x0384, # GREEK TONOS - 0x00f0: 0x00ad, # SOFT HYPHEN - 0x00f1: 0x00b1, # PLUS-MINUS SIGN - 0x00f2: 0x03c5, # GREEK SMALL LETTER UPSILON - 0x00f3: 0x03c6, # GREEK SMALL LETTER PHI - 0x00f4: 0x03c7, # GREEK SMALL LETTER CHI - 0x00f5: 0x00a7, # SECTION SIGN - 0x00f6: 0x03c8, # GREEK SMALL LETTER PSI - 0x00f7: 0x0385, # GREEK DIALYTIKA TONOS - 0x00f8: 0x00b0, # DEGREE SIGN - 0x00f9: 0x00a8, # DIAERESIS - 0x00fa: 0x03c9, # GREEK SMALL LETTER OMEGA - 0x00fb: 0x03cb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x00fc: 0x03b0, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x00fd: 0x03ce, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x00fe: 0x25a0, # BLACK SQUARE - 0x00ff: 0x00a0, # NO-BREAK SPACE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> NULL - u'\x01' # 0x0001 -> START OF HEADING - u'\x02' # 0x0002 -> START OF TEXT - u'\x03' # 0x0003 -> END OF TEXT - u'\x04' # 0x0004 -> END OF TRANSMISSION - u'\x05' # 0x0005 -> ENQUIRY - u'\x06' # 0x0006 -> ACKNOWLEDGE - u'\x07' # 0x0007 -> BELL - u'\x08' # 0x0008 -> BACKSPACE - u'\t' # 0x0009 -> HORIZONTAL TABULATION - u'\n' # 0x000a -> LINE FEED - u'\x0b' # 0x000b -> VERTICAL TABULATION - u'\x0c' # 0x000c -> FORM FEED - u'\r' # 0x000d -> CARRIAGE RETURN - u'\x0e' # 0x000e -> SHIFT OUT - u'\x0f' # 0x000f -> SHIFT IN - u'\x10' # 0x0010 -> DATA LINK ESCAPE - u'\x11' # 0x0011 -> DEVICE CONTROL ONE - u'\x12' # 0x0012 -> DEVICE CONTROL TWO - u'\x13' # 0x0013 -> DEVICE CONTROL THREE - u'\x14' # 0x0014 -> DEVICE CONTROL FOUR - u'\x15' # 0x0015 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x0016 -> SYNCHRONOUS IDLE - u'\x17' # 0x0017 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x0018 -> CANCEL - u'\x19' # 0x0019 -> END OF MEDIUM - u'\x1a' # 0x001a -> SUBSTITUTE - u'\x1b' # 0x001b -> ESCAPE - u'\x1c' # 0x001c -> FILE SEPARATOR - u'\x1d' # 0x001d -> GROUP SEPARATOR - u'\x1e' # 0x001e -> RECORD SEPARATOR - u'\x1f' # 0x001f -> UNIT SEPARATOR - u' ' # 0x0020 -> SPACE - u'!' # 0x0021 -> EXCLAMATION MARK - u'"' # 0x0022 -> QUOTATION MARK - u'#' # 0x0023 -> NUMBER SIGN - u'$' # 0x0024 -> DOLLAR SIGN - u'%' # 0x0025 -> PERCENT SIGN - u'&' # 0x0026 -> AMPERSAND - u"'" # 0x0027 -> APOSTROPHE - u'(' # 0x0028 -> LEFT PARENTHESIS - u')' # 0x0029 -> RIGHT PARENTHESIS - u'*' # 0x002a -> ASTERISK - u'+' # 0x002b -> PLUS SIGN - u',' # 0x002c -> COMMA - u'-' # 0x002d -> HYPHEN-MINUS - u'.' # 0x002e -> FULL STOP - u'/' # 0x002f -> SOLIDUS - u'0' # 0x0030 -> DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE - u':' # 0x003a -> COLON - u';' # 0x003b -> SEMICOLON - u'<' # 0x003c -> LESS-THAN SIGN - u'=' # 0x003d -> EQUALS SIGN - u'>' # 0x003e -> GREATER-THAN SIGN - u'?' # 0x003f -> QUESTION MARK - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET - u'\\' # 0x005c -> REVERSE SOLIDUS - u']' # 0x005d -> RIGHT SQUARE BRACKET - u'^' # 0x005e -> CIRCUMFLEX ACCENT - u'_' # 0x005f -> LOW LINE - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET - u'|' # 0x007c -> VERTICAL LINE - u'}' # 0x007d -> RIGHT CURLY BRACKET - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> DELETE - u'\ufffe' # 0x0080 -> UNDEFINED - u'\ufffe' # 0x0081 -> UNDEFINED - u'\ufffe' # 0x0082 -> UNDEFINED - u'\ufffe' # 0x0083 -> UNDEFINED - u'\ufffe' # 0x0084 -> UNDEFINED - u'\ufffe' # 0x0085 -> UNDEFINED - u'\u0386' # 0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\ufffe' # 0x0087 -> UNDEFINED - u'\xb7' # 0x0088 -> MIDDLE DOT - u'\xac' # 0x0089 -> NOT SIGN - u'\xa6' # 0x008a -> BROKEN BAR - u'\u2018' # 0x008b -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x008c -> RIGHT SINGLE QUOTATION MARK - u'\u0388' # 0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u2015' # 0x008e -> HORIZONTAL BAR - u'\u0389' # 0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u03aa' # 0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u038c' # 0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\ufffe' # 0x0093 -> UNDEFINED - u'\ufffe' # 0x0094 -> UNDEFINED - u'\u038e' # 0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u03ab' # 0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\xa9' # 0x0097 -> COPYRIGHT SIGN - u'\u038f' # 0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\xb2' # 0x0099 -> SUPERSCRIPT TWO - u'\xb3' # 0x009a -> SUPERSCRIPT THREE - u'\u03ac' # 0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\xa3' # 0x009c -> POUND SIGN - u'\u03ad' # 0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0x009e -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0x009f -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03ca' # 0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u0390' # 0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03cc' # 0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u0391' # 0x00a4 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0x00a5 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0x00a6 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0x00a7 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0x00a8 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0x00a9 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0x00aa -> GREEK CAPITAL LETTER ETA - u'\xbd' # 0x00ab -> VULGAR FRACTION ONE HALF - u'\u0398' # 0x00ac -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0x00ad -> GREEK CAPITAL LETTER IOTA - u'\xab' # 0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2591' # 0x00b0 -> LIGHT SHADE - u'\u2592' # 0x00b1 -> MEDIUM SHADE - u'\u2593' # 0x00b2 -> DARK SHADE - u'\u2502' # 0x00b3 -> BOX DRAWINGS LIGHT VERTICAL - u'\u2524' # 0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u039a' # 0x00b5 -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0x00b6 -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0x00b7 -> GREEK CAPITAL LETTER MU - u'\u039d' # 0x00b8 -> GREEK CAPITAL LETTER NU - u'\u2563' # 0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2551' # 0x00ba -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2557' # 0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u255d' # 0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u039e' # 0x00bd -> GREEK CAPITAL LETTER XI - u'\u039f' # 0x00be -> GREEK CAPITAL LETTER OMICRON - u'\u2510' # 0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2534' # 0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u252c' # 0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u251c' # 0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2500' # 0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u253c' # 0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u03a0' # 0x00c6 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0x00c7 -> GREEK CAPITAL LETTER RHO - u'\u255a' # 0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u2554' # 0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2569' # 0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u2566' # 0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2560' # 0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2550' # 0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u256c' # 0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\u03a3' # 0x00cf -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0x00d0 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0x00d1 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0x00d2 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0x00d3 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0x00d4 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0x00d5 -> GREEK CAPITAL LETTER OMEGA - u'\u03b1' # 0x00d6 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x00d7 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x00d8 -> GREEK SMALL LETTER GAMMA - u'\u2518' # 0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u250c' # 0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2588' # 0x00db -> FULL BLOCK - u'\u2584' # 0x00dc -> LOWER HALF BLOCK - u'\u03b4' # 0x00dd -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x00de -> GREEK SMALL LETTER EPSILON - u'\u2580' # 0x00df -> UPPER HALF BLOCK - u'\u03b6' # 0x00e0 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0x00e1 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x00e2 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x00e3 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x00e4 -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x00e5 -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x00e6 -> GREEK SMALL LETTER MU - u'\u03bd' # 0x00e7 -> GREEK SMALL LETTER NU - u'\u03be' # 0x00e8 -> GREEK SMALL LETTER XI - u'\u03bf' # 0x00e9 -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0x00ea -> GREEK SMALL LETTER PI - u'\u03c1' # 0x00eb -> GREEK SMALL LETTER RHO - u'\u03c3' # 0x00ec -> GREEK SMALL LETTER SIGMA - u'\u03c2' # 0x00ed -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0x00ee -> GREEK SMALL LETTER TAU - u'\u0384' # 0x00ef -> GREEK TONOS - u'\xad' # 0x00f0 -> SOFT HYPHEN - u'\xb1' # 0x00f1 -> PLUS-MINUS SIGN - u'\u03c5' # 0x00f2 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0x00f3 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0x00f4 -> GREEK SMALL LETTER CHI - u'\xa7' # 0x00f5 -> SECTION SIGN - u'\u03c8' # 0x00f6 -> GREEK SMALL LETTER PSI - u'\u0385' # 0x00f7 -> GREEK DIALYTIKA TONOS - u'\xb0' # 0x00f8 -> DEGREE SIGN - u'\xa8' # 0x00f9 -> DIAERESIS - u'\u03c9' # 0x00fa -> GREEK SMALL LETTER OMEGA - u'\u03cb' # 0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03b0' # 0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03ce' # 0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u25a0' # 0x00fe -> BLACK SQUARE - u'\xa0' # 0x00ff -> NO-BREAK SPACE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # NULL - 0x0001: 0x0001, # START OF HEADING - 0x0002: 0x0002, # START OF TEXT - 0x0003: 0x0003, # END OF TEXT - 0x0004: 0x0004, # END OF TRANSMISSION - 0x0005: 0x0005, # ENQUIRY - 0x0006: 0x0006, # ACKNOWLEDGE - 0x0007: 0x0007, # BELL - 0x0008: 0x0008, # BACKSPACE - 0x0009: 0x0009, # HORIZONTAL TABULATION - 0x000a: 0x000a, # LINE FEED - 0x000b: 0x000b, # VERTICAL TABULATION - 0x000c: 0x000c, # FORM FEED - 0x000d: 0x000d, # CARRIAGE RETURN - 0x000e: 0x000e, # SHIFT OUT - 0x000f: 0x000f, # SHIFT IN - 0x0010: 0x0010, # DATA LINK ESCAPE - 0x0011: 0x0011, # DEVICE CONTROL ONE - 0x0012: 0x0012, # DEVICE CONTROL TWO - 0x0013: 0x0013, # DEVICE CONTROL THREE - 0x0014: 0x0014, # DEVICE CONTROL FOUR - 0x0015: 0x0015, # NEGATIVE ACKNOWLEDGE - 0x0016: 0x0016, # SYNCHRONOUS IDLE - 0x0017: 0x0017, # END OF TRANSMISSION BLOCK - 0x0018: 0x0018, # CANCEL - 0x0019: 0x0019, # END OF MEDIUM - 0x001a: 0x001a, # SUBSTITUTE - 0x001b: 0x001b, # ESCAPE - 0x001c: 0x001c, # FILE SEPARATOR - 0x001d: 0x001d, # GROUP SEPARATOR - 0x001e: 0x001e, # RECORD SEPARATOR - 0x001f: 0x001f, # UNIT SEPARATOR - 0x0020: 0x0020, # SPACE - 0x0021: 0x0021, # EXCLAMATION MARK - 0x0022: 0x0022, # QUOTATION MARK - 0x0023: 0x0023, # NUMBER SIGN - 0x0024: 0x0024, # DOLLAR SIGN - 0x0025: 0x0025, # PERCENT SIGN - 0x0026: 0x0026, # AMPERSAND - 0x0027: 0x0027, # APOSTROPHE - 0x0028: 0x0028, # LEFT PARENTHESIS - 0x0029: 0x0029, # RIGHT PARENTHESIS - 0x002a: 0x002a, # ASTERISK - 0x002b: 0x002b, # PLUS SIGN - 0x002c: 0x002c, # COMMA - 0x002d: 0x002d, # HYPHEN-MINUS - 0x002e: 0x002e, # FULL STOP - 0x002f: 0x002f, # SOLIDUS - 0x0030: 0x0030, # DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE - 0x003a: 0x003a, # COLON - 0x003b: 0x003b, # SEMICOLON - 0x003c: 0x003c, # LESS-THAN SIGN - 0x003d: 0x003d, # EQUALS SIGN - 0x003e: 0x003e, # GREATER-THAN SIGN - 0x003f: 0x003f, # QUESTION MARK - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET - 0x005c: 0x005c, # REVERSE SOLIDUS - 0x005d: 0x005d, # RIGHT SQUARE BRACKET - 0x005e: 0x005e, # CIRCUMFLEX ACCENT - 0x005f: 0x005f, # LOW LINE - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET - 0x007c: 0x007c, # VERTICAL LINE - 0x007d: 0x007d, # RIGHT CURLY BRACKET - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # DELETE - 0x00a0: 0x00ff, # NO-BREAK SPACE - 0x00a3: 0x009c, # POUND SIGN - 0x00a6: 0x008a, # BROKEN BAR - 0x00a7: 0x00f5, # SECTION SIGN - 0x00a8: 0x00f9, # DIAERESIS - 0x00a9: 0x0097, # COPYRIGHT SIGN - 0x00ab: 0x00ae, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00ac: 0x0089, # NOT SIGN - 0x00ad: 0x00f0, # SOFT HYPHEN - 0x00b0: 0x00f8, # DEGREE SIGN - 0x00b1: 0x00f1, # PLUS-MINUS SIGN - 0x00b2: 0x0099, # SUPERSCRIPT TWO - 0x00b3: 0x009a, # SUPERSCRIPT THREE - 0x00b7: 0x0088, # MIDDLE DOT - 0x00bb: 0x00af, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00bd: 0x00ab, # VULGAR FRACTION ONE HALF - 0x0384: 0x00ef, # GREEK TONOS - 0x0385: 0x00f7, # GREEK DIALYTIKA TONOS - 0x0386: 0x0086, # GREEK CAPITAL LETTER ALPHA WITH TONOS - 0x0388: 0x008d, # GREEK CAPITAL LETTER EPSILON WITH TONOS - 0x0389: 0x008f, # GREEK CAPITAL LETTER ETA WITH TONOS - 0x038a: 0x0090, # GREEK CAPITAL LETTER IOTA WITH TONOS - 0x038c: 0x0092, # GREEK CAPITAL LETTER OMICRON WITH TONOS - 0x038e: 0x0095, # GREEK CAPITAL LETTER UPSILON WITH TONOS - 0x038f: 0x0098, # GREEK CAPITAL LETTER OMEGA WITH TONOS - 0x0390: 0x00a1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - 0x0391: 0x00a4, # GREEK CAPITAL LETTER ALPHA - 0x0392: 0x00a5, # GREEK CAPITAL LETTER BETA - 0x0393: 0x00a6, # GREEK CAPITAL LETTER GAMMA - 0x0394: 0x00a7, # GREEK CAPITAL LETTER DELTA - 0x0395: 0x00a8, # GREEK CAPITAL LETTER EPSILON - 0x0396: 0x00a9, # GREEK CAPITAL LETTER ZETA - 0x0397: 0x00aa, # GREEK CAPITAL LETTER ETA - 0x0398: 0x00ac, # GREEK CAPITAL LETTER THETA - 0x0399: 0x00ad, # GREEK CAPITAL LETTER IOTA - 0x039a: 0x00b5, # GREEK CAPITAL LETTER KAPPA - 0x039b: 0x00b6, # GREEK CAPITAL LETTER LAMDA - 0x039c: 0x00b7, # GREEK CAPITAL LETTER MU - 0x039d: 0x00b8, # GREEK CAPITAL LETTER NU - 0x039e: 0x00bd, # GREEK CAPITAL LETTER XI - 0x039f: 0x00be, # GREEK CAPITAL LETTER OMICRON - 0x03a0: 0x00c6, # GREEK CAPITAL LETTER PI - 0x03a1: 0x00c7, # GREEK CAPITAL LETTER RHO - 0x03a3: 0x00cf, # GREEK CAPITAL LETTER SIGMA - 0x03a4: 0x00d0, # GREEK CAPITAL LETTER TAU - 0x03a5: 0x00d1, # GREEK CAPITAL LETTER UPSILON - 0x03a6: 0x00d2, # GREEK CAPITAL LETTER PHI - 0x03a7: 0x00d3, # GREEK CAPITAL LETTER CHI - 0x03a8: 0x00d4, # GREEK CAPITAL LETTER PSI - 0x03a9: 0x00d5, # GREEK CAPITAL LETTER OMEGA - 0x03aa: 0x0091, # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - 0x03ab: 0x0096, # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - 0x03ac: 0x009b, # GREEK SMALL LETTER ALPHA WITH TONOS - 0x03ad: 0x009d, # GREEK SMALL LETTER EPSILON WITH TONOS - 0x03ae: 0x009e, # GREEK SMALL LETTER ETA WITH TONOS - 0x03af: 0x009f, # GREEK SMALL LETTER IOTA WITH TONOS - 0x03b0: 0x00fc, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - 0x03b1: 0x00d6, # GREEK SMALL LETTER ALPHA - 0x03b2: 0x00d7, # GREEK SMALL LETTER BETA - 0x03b3: 0x00d8, # GREEK SMALL LETTER GAMMA - 0x03b4: 0x00dd, # GREEK SMALL LETTER DELTA - 0x03b5: 0x00de, # GREEK SMALL LETTER EPSILON - 0x03b6: 0x00e0, # GREEK SMALL LETTER ZETA - 0x03b7: 0x00e1, # GREEK SMALL LETTER ETA - 0x03b8: 0x00e2, # GREEK SMALL LETTER THETA - 0x03b9: 0x00e3, # GREEK SMALL LETTER IOTA - 0x03ba: 0x00e4, # GREEK SMALL LETTER KAPPA - 0x03bb: 0x00e5, # GREEK SMALL LETTER LAMDA - 0x03bc: 0x00e6, # GREEK SMALL LETTER MU - 0x03bd: 0x00e7, # GREEK SMALL LETTER NU - 0x03be: 0x00e8, # GREEK SMALL LETTER XI - 0x03bf: 0x00e9, # GREEK SMALL LETTER OMICRON - 0x03c0: 0x00ea, # GREEK SMALL LETTER PI - 0x03c1: 0x00eb, # GREEK SMALL LETTER RHO - 0x03c2: 0x00ed, # GREEK SMALL LETTER FINAL SIGMA - 0x03c3: 0x00ec, # GREEK SMALL LETTER SIGMA - 0x03c4: 0x00ee, # GREEK SMALL LETTER TAU - 0x03c5: 0x00f2, # GREEK SMALL LETTER UPSILON - 0x03c6: 0x00f3, # GREEK SMALL LETTER PHI - 0x03c7: 0x00f4, # GREEK SMALL LETTER CHI - 0x03c8: 0x00f6, # GREEK SMALL LETTER PSI - 0x03c9: 0x00fa, # GREEK SMALL LETTER OMEGA - 0x03ca: 0x00a0, # GREEK SMALL LETTER IOTA WITH DIALYTIKA - 0x03cb: 0x00fb, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA - 0x03cc: 0x00a2, # GREEK SMALL LETTER OMICRON WITH TONOS - 0x03cd: 0x00a3, # GREEK SMALL LETTER UPSILON WITH TONOS - 0x03ce: 0x00fd, # GREEK SMALL LETTER OMEGA WITH TONOS - 0x2015: 0x008e, # HORIZONTAL BAR - 0x2018: 0x008b, # LEFT SINGLE QUOTATION MARK - 0x2019: 0x008c, # RIGHT SINGLE QUOTATION MARK - 0x2500: 0x00c4, # BOX DRAWINGS LIGHT HORIZONTAL - 0x2502: 0x00b3, # BOX DRAWINGS LIGHT VERTICAL - 0x250c: 0x00da, # BOX DRAWINGS LIGHT DOWN AND RIGHT - 0x2510: 0x00bf, # BOX DRAWINGS LIGHT DOWN AND LEFT - 0x2514: 0x00c0, # BOX DRAWINGS LIGHT UP AND RIGHT - 0x2518: 0x00d9, # BOX DRAWINGS LIGHT UP AND LEFT - 0x251c: 0x00c3, # BOX DRAWINGS LIGHT VERTICAL AND RIGHT - 0x2524: 0x00b4, # BOX DRAWINGS LIGHT VERTICAL AND LEFT - 0x252c: 0x00c2, # BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - 0x2534: 0x00c1, # BOX DRAWINGS LIGHT UP AND HORIZONTAL - 0x253c: 0x00c5, # BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - 0x2550: 0x00cd, # BOX DRAWINGS DOUBLE HORIZONTAL - 0x2551: 0x00ba, # BOX DRAWINGS DOUBLE VERTICAL - 0x2554: 0x00c9, # BOX DRAWINGS DOUBLE DOWN AND RIGHT - 0x2557: 0x00bb, # BOX DRAWINGS DOUBLE DOWN AND LEFT - 0x255a: 0x00c8, # BOX DRAWINGS DOUBLE UP AND RIGHT - 0x255d: 0x00bc, # BOX DRAWINGS DOUBLE UP AND LEFT - 0x2560: 0x00cc, # BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - 0x2563: 0x00b9, # BOX DRAWINGS DOUBLE VERTICAL AND LEFT - 0x2566: 0x00cb, # BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - 0x2569: 0x00ca, # BOX DRAWINGS DOUBLE UP AND HORIZONTAL - 0x256c: 0x00ce, # BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - 0x2580: 0x00df, # UPPER HALF BLOCK - 0x2584: 0x00dc, # LOWER HALF BLOCK - 0x2588: 0x00db, # FULL BLOCK - 0x2591: 0x00b0, # LIGHT SHADE - 0x2592: 0x00b1, # MEDIUM SHADE - 0x2593: 0x00b2, # DARK SHADE - 0x25a0: 0x00fe, # BLACK SQUARE -}
--- a/test/lib/python2.7/encodings/cp874.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp874 generated from 'MAPPINGS/VENDORS/MICSFT/WINDOWS/CP874.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp874', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u20ac' # 0x80 -> EURO SIGN - u'\ufffe' # 0x81 -> UNDEFINED - u'\ufffe' # 0x82 -> UNDEFINED - u'\ufffe' # 0x83 -> UNDEFINED - u'\ufffe' # 0x84 -> UNDEFINED - u'\u2026' # 0x85 -> HORIZONTAL ELLIPSIS - u'\ufffe' # 0x86 -> UNDEFINED - u'\ufffe' # 0x87 -> UNDEFINED - u'\ufffe' # 0x88 -> UNDEFINED - u'\ufffe' # 0x89 -> UNDEFINED - u'\ufffe' # 0x8A -> UNDEFINED - u'\ufffe' # 0x8B -> UNDEFINED - u'\ufffe' # 0x8C -> UNDEFINED - u'\ufffe' # 0x8D -> UNDEFINED - u'\ufffe' # 0x8E -> UNDEFINED - u'\ufffe' # 0x8F -> UNDEFINED - u'\ufffe' # 0x90 -> UNDEFINED - u'\u2018' # 0x91 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0x92 -> RIGHT SINGLE QUOTATION MARK - u'\u201c' # 0x93 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0x94 -> RIGHT DOUBLE QUOTATION MARK - u'\u2022' # 0x95 -> BULLET - u'\u2013' # 0x96 -> EN DASH - u'\u2014' # 0x97 -> EM DASH - u'\ufffe' # 0x98 -> UNDEFINED - u'\ufffe' # 0x99 -> UNDEFINED - u'\ufffe' # 0x9A -> UNDEFINED - u'\ufffe' # 0x9B -> UNDEFINED - u'\ufffe' # 0x9C -> UNDEFINED - u'\ufffe' # 0x9D -> UNDEFINED - u'\ufffe' # 0x9E -> UNDEFINED - u'\ufffe' # 0x9F -> UNDEFINED - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO - u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING - u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA - u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN - u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xC4 -> THAI CHARACTER RU - u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xC6 -> THAI CHARACTER LU - u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG - u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU - u'\ufffe' # 0xDB -> UNDEFINED - u'\ufffe' # 0xDC -> UNDEFINED - u'\ufffe' # 0xDD -> UNDEFINED - u'\ufffe' # 0xDE -> UNDEFINED - u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xF0 -> THAI DIGIT ZERO - u'\u0e51' # 0xF1 -> THAI DIGIT ONE - u'\u0e52' # 0xF2 -> THAI DIGIT TWO - u'\u0e53' # 0xF3 -> THAI DIGIT THREE - u'\u0e54' # 0xF4 -> THAI DIGIT FOUR - u'\u0e55' # 0xF5 -> THAI DIGIT FIVE - u'\u0e56' # 0xF6 -> THAI DIGIT SIX - u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xF9 -> THAI DIGIT NINE - u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT - u'\ufffe' # 0xFC -> UNDEFINED - u'\ufffe' # 0xFD -> UNDEFINED - u'\ufffe' # 0xFE -> UNDEFINED - u'\ufffe' # 0xFF -> UNDEFINED -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp875.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec cp875 generated from 'MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='cp875', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x9c' # 0x04 -> CONTROL - u'\t' # 0x05 -> HORIZONTAL TABULATION - u'\x86' # 0x06 -> CONTROL - u'\x7f' # 0x07 -> DELETE - u'\x97' # 0x08 -> CONTROL - u'\x8d' # 0x09 -> CONTROL - u'\x8e' # 0x0A -> CONTROL - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x9d' # 0x14 -> CONTROL - u'\x85' # 0x15 -> CONTROL - u'\x08' # 0x16 -> BACKSPACE - u'\x87' # 0x17 -> CONTROL - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x92' # 0x1A -> CONTROL - u'\x8f' # 0x1B -> CONTROL - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u'\x80' # 0x20 -> CONTROL - u'\x81' # 0x21 -> CONTROL - u'\x82' # 0x22 -> CONTROL - u'\x83' # 0x23 -> CONTROL - u'\x84' # 0x24 -> CONTROL - u'\n' # 0x25 -> LINE FEED - u'\x17' # 0x26 -> END OF TRANSMISSION BLOCK - u'\x1b' # 0x27 -> ESCAPE - u'\x88' # 0x28 -> CONTROL - u'\x89' # 0x29 -> CONTROL - u'\x8a' # 0x2A -> CONTROL - u'\x8b' # 0x2B -> CONTROL - u'\x8c' # 0x2C -> CONTROL - u'\x05' # 0x2D -> ENQUIRY - u'\x06' # 0x2E -> ACKNOWLEDGE - u'\x07' # 0x2F -> BELL - u'\x90' # 0x30 -> CONTROL - u'\x91' # 0x31 -> CONTROL - u'\x16' # 0x32 -> SYNCHRONOUS IDLE - u'\x93' # 0x33 -> CONTROL - u'\x94' # 0x34 -> CONTROL - u'\x95' # 0x35 -> CONTROL - u'\x96' # 0x36 -> CONTROL - u'\x04' # 0x37 -> END OF TRANSMISSION - u'\x98' # 0x38 -> CONTROL - u'\x99' # 0x39 -> CONTROL - u'\x9a' # 0x3A -> CONTROL - u'\x9b' # 0x3B -> CONTROL - u'\x14' # 0x3C -> DEVICE CONTROL FOUR - u'\x15' # 0x3D -> NEGATIVE ACKNOWLEDGE - u'\x9e' # 0x3E -> CONTROL - u'\x1a' # 0x3F -> SUBSTITUTE - u' ' # 0x40 -> SPACE - u'\u0391' # 0x41 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0x42 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0x43 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0x44 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0x45 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0x46 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0x47 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0x48 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0x49 -> GREEK CAPITAL LETTER IOTA - u'[' # 0x4A -> LEFT SQUARE BRACKET - u'.' # 0x4B -> FULL STOP - u'<' # 0x4C -> LESS-THAN SIGN - u'(' # 0x4D -> LEFT PARENTHESIS - u'+' # 0x4E -> PLUS SIGN - u'!' # 0x4F -> EXCLAMATION MARK - u'&' # 0x50 -> AMPERSAND - u'\u039a' # 0x51 -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0x52 -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0x53 -> GREEK CAPITAL LETTER MU - u'\u039d' # 0x54 -> GREEK CAPITAL LETTER NU - u'\u039e' # 0x55 -> GREEK CAPITAL LETTER XI - u'\u039f' # 0x56 -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0x57 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0x58 -> GREEK CAPITAL LETTER RHO - u'\u03a3' # 0x59 -> GREEK CAPITAL LETTER SIGMA - u']' # 0x5A -> RIGHT SQUARE BRACKET - u'$' # 0x5B -> DOLLAR SIGN - u'*' # 0x5C -> ASTERISK - u')' # 0x5D -> RIGHT PARENTHESIS - u';' # 0x5E -> SEMICOLON - u'^' # 0x5F -> CIRCUMFLEX ACCENT - u'-' # 0x60 -> HYPHEN-MINUS - u'/' # 0x61 -> SOLIDUS - u'\u03a4' # 0x62 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0x63 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0x64 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0x65 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0x66 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0x67 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'|' # 0x6A -> VERTICAL LINE - u',' # 0x6B -> COMMA - u'%' # 0x6C -> PERCENT SIGN - u'_' # 0x6D -> LOW LINE - u'>' # 0x6E -> GREATER-THAN SIGN - u'?' # 0x6F -> QUESTION MARK - u'\xa8' # 0x70 -> DIAERESIS - u'\u0386' # 0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\xa0' # 0x74 -> NO-BREAK SPACE - u'\u038a' # 0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'`' # 0x79 -> GRAVE ACCENT - u':' # 0x7A -> COLON - u'#' # 0x7B -> NUMBER SIGN - u'@' # 0x7C -> COMMERCIAL AT - u"'" # 0x7D -> APOSTROPHE - u'=' # 0x7E -> EQUALS SIGN - u'"' # 0x7F -> QUOTATION MARK - u'\u0385' # 0x80 -> GREEK DIALYTIKA TONOS - u'a' # 0x81 -> LATIN SMALL LETTER A - u'b' # 0x82 -> LATIN SMALL LETTER B - u'c' # 0x83 -> LATIN SMALL LETTER C - u'd' # 0x84 -> LATIN SMALL LETTER D - u'e' # 0x85 -> LATIN SMALL LETTER E - u'f' # 0x86 -> LATIN SMALL LETTER F - u'g' # 0x87 -> LATIN SMALL LETTER G - u'h' # 0x88 -> LATIN SMALL LETTER H - u'i' # 0x89 -> LATIN SMALL LETTER I - u'\u03b1' # 0x8A -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0x8B -> GREEK SMALL LETTER BETA - u'\u03b3' # 0x8C -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0x8D -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0x8E -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0x8F -> GREEK SMALL LETTER ZETA - u'\xb0' # 0x90 -> DEGREE SIGN - u'j' # 0x91 -> LATIN SMALL LETTER J - u'k' # 0x92 -> LATIN SMALL LETTER K - u'l' # 0x93 -> LATIN SMALL LETTER L - u'm' # 0x94 -> LATIN SMALL LETTER M - u'n' # 0x95 -> LATIN SMALL LETTER N - u'o' # 0x96 -> LATIN SMALL LETTER O - u'p' # 0x97 -> LATIN SMALL LETTER P - u'q' # 0x98 -> LATIN SMALL LETTER Q - u'r' # 0x99 -> LATIN SMALL LETTER R - u'\u03b7' # 0x9A -> GREEK SMALL LETTER ETA - u'\u03b8' # 0x9B -> GREEK SMALL LETTER THETA - u'\u03b9' # 0x9C -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0x9D -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0x9E -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0x9F -> GREEK SMALL LETTER MU - u'\xb4' # 0xA0 -> ACUTE ACCENT - u'~' # 0xA1 -> TILDE - u's' # 0xA2 -> LATIN SMALL LETTER S - u't' # 0xA3 -> LATIN SMALL LETTER T - u'u' # 0xA4 -> LATIN SMALL LETTER U - u'v' # 0xA5 -> LATIN SMALL LETTER V - u'w' # 0xA6 -> LATIN SMALL LETTER W - u'x' # 0xA7 -> LATIN SMALL LETTER X - u'y' # 0xA8 -> LATIN SMALL LETTER Y - u'z' # 0xA9 -> LATIN SMALL LETTER Z - u'\u03bd' # 0xAA -> GREEK SMALL LETTER NU - u'\u03be' # 0xAB -> GREEK SMALL LETTER XI - u'\u03bf' # 0xAC -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xAD -> GREEK SMALL LETTER PI - u'\u03c1' # 0xAE -> GREEK SMALL LETTER RHO - u'\u03c3' # 0xAF -> GREEK SMALL LETTER SIGMA - u'\xa3' # 0xB0 -> POUND SIGN - u'\u03ac' # 0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xB3 -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03ca' # 0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03af' # 0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03cb' # 0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03ce' # 0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u03c2' # 0xBA -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c4' # 0xBB -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xBC -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xBD -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xBE -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xBF -> GREEK SMALL LETTER PSI - u'{' # 0xC0 -> LEFT CURLY BRACKET - u'A' # 0xC1 -> LATIN CAPITAL LETTER A - u'B' # 0xC2 -> LATIN CAPITAL LETTER B - u'C' # 0xC3 -> LATIN CAPITAL LETTER C - u'D' # 0xC4 -> LATIN CAPITAL LETTER D - u'E' # 0xC5 -> LATIN CAPITAL LETTER E - u'F' # 0xC6 -> LATIN CAPITAL LETTER F - u'G' # 0xC7 -> LATIN CAPITAL LETTER G - u'H' # 0xC8 -> LATIN CAPITAL LETTER H - u'I' # 0xC9 -> LATIN CAPITAL LETTER I - u'\xad' # 0xCA -> SOFT HYPHEN - u'\u03c9' # 0xCB -> GREEK SMALL LETTER OMEGA - u'\u0390' # 0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03b0' # 0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u2018' # 0xCE -> LEFT SINGLE QUOTATION MARK - u'\u2015' # 0xCF -> HORIZONTAL BAR - u'}' # 0xD0 -> RIGHT CURLY BRACKET - u'J' # 0xD1 -> LATIN CAPITAL LETTER J - u'K' # 0xD2 -> LATIN CAPITAL LETTER K - u'L' # 0xD3 -> LATIN CAPITAL LETTER L - u'M' # 0xD4 -> LATIN CAPITAL LETTER M - u'N' # 0xD5 -> LATIN CAPITAL LETTER N - u'O' # 0xD6 -> LATIN CAPITAL LETTER O - u'P' # 0xD7 -> LATIN CAPITAL LETTER P - u'Q' # 0xD8 -> LATIN CAPITAL LETTER Q - u'R' # 0xD9 -> LATIN CAPITAL LETTER R - u'\xb1' # 0xDA -> PLUS-MINUS SIGN - u'\xbd' # 0xDB -> VULGAR FRACTION ONE HALF - u'\x1a' # 0xDC -> SUBSTITUTE - u'\u0387' # 0xDD -> GREEK ANO TELEIA - u'\u2019' # 0xDE -> RIGHT SINGLE QUOTATION MARK - u'\xa6' # 0xDF -> BROKEN BAR - u'\\' # 0xE0 -> REVERSE SOLIDUS - u'\x1a' # 0xE1 -> SUBSTITUTE - u'S' # 0xE2 -> LATIN CAPITAL LETTER S - u'T' # 0xE3 -> LATIN CAPITAL LETTER T - u'U' # 0xE4 -> LATIN CAPITAL LETTER U - u'V' # 0xE5 -> LATIN CAPITAL LETTER V - u'W' # 0xE6 -> LATIN CAPITAL LETTER W - u'X' # 0xE7 -> LATIN CAPITAL LETTER X - u'Y' # 0xE8 -> LATIN CAPITAL LETTER Y - u'Z' # 0xE9 -> LATIN CAPITAL LETTER Z - u'\xb2' # 0xEA -> SUPERSCRIPT TWO - u'\xa7' # 0xEB -> SECTION SIGN - u'\x1a' # 0xEC -> SUBSTITUTE - u'\x1a' # 0xED -> SUBSTITUTE - u'\xab' # 0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xEF -> NOT SIGN - u'0' # 0xF0 -> DIGIT ZERO - u'1' # 0xF1 -> DIGIT ONE - u'2' # 0xF2 -> DIGIT TWO - u'3' # 0xF3 -> DIGIT THREE - u'4' # 0xF4 -> DIGIT FOUR - u'5' # 0xF5 -> DIGIT FIVE - u'6' # 0xF6 -> DIGIT SIX - u'7' # 0xF7 -> DIGIT SEVEN - u'8' # 0xF8 -> DIGIT EIGHT - u'9' # 0xF9 -> DIGIT NINE - u'\xb3' # 0xFA -> SUPERSCRIPT THREE - u'\xa9' # 0xFB -> COPYRIGHT SIGN - u'\x1a' # 0xFC -> SUBSTITUTE - u'\x1a' # 0xFD -> SUBSTITUTE - u'\xbb' # 0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\x9f' # 0xFF -> CONTROL -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/cp932.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# cp932.py: Python Unicode Codec for CP932 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('cp932') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='cp932', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/cp949.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# cp949.py: Python Unicode Codec for CP949 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_kr, codecs -import _multibytecodec as mbc - -codec = _codecs_kr.getcodec('cp949') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='cp949', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/cp950.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# cp950.py: Python Unicode Codec for CP950 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_tw, codecs -import _multibytecodec as mbc - -codec = _codecs_tw.getcodec('cp950') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='cp950', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/euc_jis_2004.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# euc_jis_2004.py: Python Unicode Codec for EUC_JIS_2004 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('euc_jis_2004') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='euc_jis_2004', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/euc_jisx0213.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# euc_jisx0213.py: Python Unicode Codec for EUC_JISX0213 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('euc_jisx0213') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='euc_jisx0213', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/euc_jp.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# euc_jp.py: Python Unicode Codec for EUC_JP -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('euc_jp') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='euc_jp', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/euc_kr.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# euc_kr.py: Python Unicode Codec for EUC_KR -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_kr, codecs -import _multibytecodec as mbc - -codec = _codecs_kr.getcodec('euc_kr') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='euc_kr', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/gb18030.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# gb18030.py: Python Unicode Codec for GB18030 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_cn, codecs -import _multibytecodec as mbc - -codec = _codecs_cn.getcodec('gb18030') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='gb18030', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/gb2312.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# gb2312.py: Python Unicode Codec for GB2312 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_cn, codecs -import _multibytecodec as mbc - -codec = _codecs_cn.getcodec('gb2312') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='gb2312', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/gbk.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# gbk.py: Python Unicode Codec for GBK -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_cn, codecs -import _multibytecodec as mbc - -codec = _codecs_cn.getcodec('gbk') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='gbk', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/hex_codec.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -""" Python 'hex_codec' Codec - 2-digit hex content transfer encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Written by Marc-Andre Lemburg (mal@lemburg.com). - -""" -import codecs, binascii - -### Codec APIs - -def hex_encode(input,errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = binascii.b2a_hex(input) - return (output, len(input)) - -def hex_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = binascii.a2b_hex(input) - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input,errors='strict'): - return hex_encode(input,errors) - def decode(self, input,errors='strict'): - return hex_decode(input,errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - assert self.errors == 'strict' - return binascii.b2a_hex(input) - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - assert self.errors == 'strict' - return binascii.a2b_hex(input) - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='hex', - encode=hex_encode, - decode=hex_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - )
--- a/test/lib/python2.7/encodings/hp_roman8.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,152 +0,0 @@ -""" Python Character Mapping Codec generated from 'hp_roman8.txt' with gencodec.py. - - Based on data from ftp://dkuug.dk/i18n/charmaps/HP-ROMAN8 (Keld Simonsen) - - Original source: LaserJet IIP Printer User's Manual HP part no - 33471-90901, Hewlet-Packard, June 1989. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='hp-roman8', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x00a1: 0x00c0, # LATIN CAPITAL LETTER A WITH GRAVE - 0x00a2: 0x00c2, # LATIN CAPITAL LETTER A WITH CIRCUMFLEX - 0x00a3: 0x00c8, # LATIN CAPITAL LETTER E WITH GRAVE - 0x00a4: 0x00ca, # LATIN CAPITAL LETTER E WITH CIRCUMFLEX - 0x00a5: 0x00cb, # LATIN CAPITAL LETTER E WITH DIAERESIS - 0x00a6: 0x00ce, # LATIN CAPITAL LETTER I WITH CIRCUMFLEX - 0x00a7: 0x00cf, # LATIN CAPITAL LETTER I WITH DIAERESIS - 0x00a8: 0x00b4, # ACUTE ACCENT - 0x00a9: 0x02cb, # MODIFIER LETTER GRAVE ACCENT (Mandarin Chinese fourth tone) - 0x00aa: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x00ab: 0x00a8, # DIAERESIS - 0x00ac: 0x02dc, # SMALL TILDE - 0x00ad: 0x00d9, # LATIN CAPITAL LETTER U WITH GRAVE - 0x00ae: 0x00db, # LATIN CAPITAL LETTER U WITH CIRCUMFLEX - 0x00af: 0x20a4, # LIRA SIGN - 0x00b0: 0x00af, # MACRON - 0x00b1: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00b2: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00b3: 0x00b0, # DEGREE SIGN - 0x00b4: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00b5: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x00b6: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x00b7: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x00b8: 0x00a1, # INVERTED EXCLAMATION MARK - 0x00b9: 0x00bf, # INVERTED QUESTION MARK - 0x00ba: 0x00a4, # CURRENCY SIGN - 0x00bb: 0x00a3, # POUND SIGN - 0x00bc: 0x00a5, # YEN SIGN - 0x00bd: 0x00a7, # SECTION SIGN - 0x00be: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x00bf: 0x00a2, # CENT SIGN - 0x00c0: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00c1: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00c2: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00c3: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00c4: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x00c5: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x00c6: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x00c7: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x00c8: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x00c9: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x00ca: 0x00f2, # LATIN SMALL LETTER O WITH GRAVE - 0x00cb: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x00cc: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00cd: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ce: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00cf: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00d0: 0x00c5, # LATIN CAPITAL LETTER A WITH RING ABOVE - 0x00d1: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00d2: 0x00d8, # LATIN CAPITAL LETTER O WITH STROKE - 0x00d3: 0x00c6, # LATIN CAPITAL LETTER AE - 0x00d4: 0x00e5, # LATIN SMALL LETTER A WITH RING ABOVE - 0x00d5: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x00d6: 0x00f8, # LATIN SMALL LETTER O WITH STROKE - 0x00d7: 0x00e6, # LATIN SMALL LETTER AE - 0x00d8: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00d9: 0x00ec, # LATIN SMALL LETTER I WITH GRAVE - 0x00da: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00db: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00dc: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00dd: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00de: 0x00df, # LATIN SMALL LETTER SHARP S (German) - 0x00df: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00e0: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e1: 0x00c3, # LATIN CAPITAL LETTER A WITH TILDE - 0x00e2: 0x00e3, # LATIN SMALL LETTER A WITH TILDE - 0x00e3: 0x00d0, # LATIN CAPITAL LETTER ETH (Icelandic) - 0x00e4: 0x00f0, # LATIN SMALL LETTER ETH (Icelandic) - 0x00e5: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00e6: 0x00cc, # LATIN CAPITAL LETTER I WITH GRAVE - 0x00e7: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00e8: 0x00d2, # LATIN CAPITAL LETTER O WITH GRAVE - 0x00e9: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ea: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x00eb: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00ec: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00ed: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00ee: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS - 0x00ef: 0x00ff, # LATIN SMALL LETTER Y WITH DIAERESIS - 0x00f0: 0x00de, # LATIN CAPITAL LETTER THORN (Icelandic) - 0x00f1: 0x00fe, # LATIN SMALL LETTER THORN (Icelandic) - 0x00f2: 0x00b7, # MIDDLE DOT - 0x00f3: 0x00b5, # MICRO SIGN - 0x00f4: 0x00b6, # PILCROW SIGN - 0x00f5: 0x00be, # VULGAR FRACTION THREE QUARTERS - 0x00f6: 0x2014, # EM DASH - 0x00f7: 0x00bc, # VULGAR FRACTION ONE QUARTER - 0x00f8: 0x00bd, # VULGAR FRACTION ONE HALF - 0x00f9: 0x00aa, # FEMININE ORDINAL INDICATOR - 0x00fa: 0x00ba, # MASCULINE ORDINAL INDICATOR - 0x00fb: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00fc: 0x25a0, # BLACK SQUARE - 0x00fd: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00fe: 0x00b1, # PLUS-MINUS SIGN - 0x00ff: None, -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map)
--- a/test/lib/python2.7/encodings/hz.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# hz.py: Python Unicode Codec for HZ -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_cn, codecs -import _multibytecodec as mbc - -codec = _codecs_cn.getcodec('hz') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='hz', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/idna.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,288 +0,0 @@ -# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep) - -import stringprep, re, codecs -from unicodedata import ucd_3_2_0 as unicodedata - -# IDNA section 3.1 -dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]") - -# IDNA section 5 -ace_prefix = "xn--" -uace_prefix = unicode(ace_prefix, "ascii") - -# This assumes query strings, so AllowUnassigned is true -def nameprep(label): - # Map - newlabel = [] - for c in label: - if stringprep.in_table_b1(c): - # Map to nothing - continue - newlabel.append(stringprep.map_table_b2(c)) - label = u"".join(newlabel) - - # Normalize - label = unicodedata.normalize("NFKC", label) - - # Prohibit - for c in label: - if stringprep.in_table_c12(c) or \ - stringprep.in_table_c22(c) or \ - stringprep.in_table_c3(c) or \ - stringprep.in_table_c4(c) or \ - stringprep.in_table_c5(c) or \ - stringprep.in_table_c6(c) or \ - stringprep.in_table_c7(c) or \ - stringprep.in_table_c8(c) or \ - stringprep.in_table_c9(c): - raise UnicodeError("Invalid character %r" % c) - - # Check bidi - RandAL = map(stringprep.in_table_d1, label) - for c in RandAL: - if c: - # There is a RandAL char in the string. Must perform further - # tests: - # 1) The characters in section 5.8 MUST be prohibited. - # This is table C.8, which was already checked - # 2) If a string contains any RandALCat character, the string - # MUST NOT contain any LCat character. - if filter(stringprep.in_table_d2, label): - raise UnicodeError("Violation of BIDI requirement 2") - - # 3) If a string contains any RandALCat character, a - # RandALCat character MUST be the first character of the - # string, and a RandALCat character MUST be the last - # character of the string. - if not RandAL[0] or not RandAL[-1]: - raise UnicodeError("Violation of BIDI requirement 3") - - return label - -def ToASCII(label): - try: - # Step 1: try ASCII - label = label.encode("ascii") - except UnicodeError: - pass - else: - # Skip to step 3: UseSTD3ASCIIRules is false, so - # Skip to step 8. - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") - - # Step 2: nameprep - label = nameprep(label) - - # Step 3: UseSTD3ASCIIRules is false - # Step 4: try ASCII - try: - label = label.encode("ascii") - except UnicodeError: - pass - else: - # Skip to step 8. - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") - - # Step 5: Check ACE prefix - if label.startswith(uace_prefix): - raise UnicodeError("Label starts with ACE prefix") - - # Step 6: Encode with PUNYCODE - label = label.encode("punycode") - - # Step 7: Prepend ACE prefix - label = ace_prefix + label - - # Step 8: Check size - if 0 < len(label) < 64: - return label - raise UnicodeError("label empty or too long") - -def ToUnicode(label): - # Step 1: Check for ASCII - if isinstance(label, str): - pure_ascii = True - else: - try: - label = label.encode("ascii") - pure_ascii = True - except UnicodeError: - pure_ascii = False - if not pure_ascii: - # Step 2: Perform nameprep - label = nameprep(label) - # It doesn't say this, but apparently, it should be ASCII now - try: - label = label.encode("ascii") - except UnicodeError: - raise UnicodeError("Invalid character in IDN label") - # Step 3: Check for ACE prefix - if not label.startswith(ace_prefix): - return unicode(label, "ascii") - - # Step 4: Remove ACE prefix - label1 = label[len(ace_prefix):] - - # Step 5: Decode using PUNYCODE - result = label1.decode("punycode") - - # Step 6: Apply ToASCII - label2 = ToASCII(result) - - # Step 7: Compare the result of step 6 with the one of step 3 - # label2 will already be in lower case. - if label.lower() != label2: - raise UnicodeError("IDNA does not round-trip", label, label2) - - # Step 8: return the result of step 5 - return result - -### Codec APIs - -class Codec(codecs.Codec): - def encode(self,input,errors='strict'): - - if errors != 'strict': - # IDNA is quite clear that implementations must be strict - raise UnicodeError("unsupported error handling "+errors) - - if not input: - return "", 0 - - result = [] - labels = dots.split(input) - if labels and len(labels[-1])==0: - trailing_dot = '.' - del labels[-1] - else: - trailing_dot = '' - for label in labels: - result.append(ToASCII(label)) - # Join with U+002E - return ".".join(result)+trailing_dot, len(input) - - def decode(self,input,errors='strict'): - - if errors != 'strict': - raise UnicodeError("Unsupported error handling "+errors) - - if not input: - return u"", 0 - - # IDNA allows decoding to operate on Unicode strings, too. - if isinstance(input, unicode): - labels = dots.split(input) - else: - # Must be ASCII string - input = str(input) - unicode(input, "ascii") - labels = input.split(".") - - if labels and len(labels[-1]) == 0: - trailing_dot = u'.' - del labels[-1] - else: - trailing_dot = u'' - - result = [] - for label in labels: - result.append(ToUnicode(label)) - - return u".".join(result)+trailing_dot, len(input) - -class IncrementalEncoder(codecs.BufferedIncrementalEncoder): - def _buffer_encode(self, input, errors, final): - if errors != 'strict': - # IDNA is quite clear that implementations must be strict - raise UnicodeError("unsupported error handling "+errors) - - if not input: - return ("", 0) - - labels = dots.split(input) - trailing_dot = u'' - if labels: - if not labels[-1]: - trailing_dot = '.' - del labels[-1] - elif not final: - # Keep potentially unfinished label until the next call - del labels[-1] - if labels: - trailing_dot = '.' - - result = [] - size = 0 - for label in labels: - result.append(ToASCII(label)) - if size: - size += 1 - size += len(label) - - # Join with U+002E - result = ".".join(result) + trailing_dot - size += len(trailing_dot) - return (result, size) - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def _buffer_decode(self, input, errors, final): - if errors != 'strict': - raise UnicodeError("Unsupported error handling "+errors) - - if not input: - return (u"", 0) - - # IDNA allows decoding to operate on Unicode strings, too. - if isinstance(input, unicode): - labels = dots.split(input) - else: - # Must be ASCII string - input = str(input) - unicode(input, "ascii") - labels = input.split(".") - - trailing_dot = u'' - if labels: - if not labels[-1]: - trailing_dot = u'.' - del labels[-1] - elif not final: - # Keep potentially unfinished label until the next call - del labels[-1] - if labels: - trailing_dot = u'.' - - result = [] - size = 0 - for label in labels: - result.append(ToUnicode(label)) - if size: - size += 1 - size += len(label) - - result = u".".join(result) + trailing_dot - size += len(trailing_dot) - return (result, size) - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='idna', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - )
--- a/test/lib/python2.7/encodings/iso2022_jp.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# iso2022_jp.py: Python Unicode Codec for ISO2022_JP -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/iso2022_jp_1.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# iso2022_jp_1.py: Python Unicode Codec for ISO2022_JP_1 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_1') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_1', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/iso2022_jp_2.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# iso2022_jp_2.py: Python Unicode Codec for ISO2022_JP_2 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_2') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_2', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/iso2022_jp_2004.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# iso2022_jp_2004.py: Python Unicode Codec for ISO2022_JP_2004 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_2004') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_2004', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/iso2022_jp_3.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# iso2022_jp_3.py: Python Unicode Codec for ISO2022_JP_3 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_3') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_3', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/iso2022_jp_ext.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# iso2022_jp_ext.py: Python Unicode Codec for ISO2022_JP_EXT -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_jp_ext') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_jp_ext', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/iso2022_kr.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# iso2022_kr.py: Python Unicode Codec for ISO2022_KR -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_iso2022, codecs -import _multibytecodec as mbc - -codec = _codecs_iso2022.getcodec('iso2022_kr') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='iso2022_kr', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/iso8859_1.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_1 generated from 'MAPPINGS/ISO8859/8859-1.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-1', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_10.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_10 generated from 'MAPPINGS/ISO8859/8859-10.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-10', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0112' # 0xA2 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0122' # 0xA3 -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u012a' # 0xA4 -> LATIN CAPITAL LETTER I WITH MACRON - u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE - u'\u0136' # 0xA6 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u013b' # 0xA8 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0110' # 0xA9 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0160' # 0xAA -> LATIN CAPITAL LETTER S WITH CARON - u'\u0166' # 0xAB -> LATIN CAPITAL LETTER T WITH STROKE - u'\u017d' # 0xAC -> LATIN CAPITAL LETTER Z WITH CARON - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u016a' # 0xAE -> LATIN CAPITAL LETTER U WITH MACRON - u'\u014a' # 0xAF -> LATIN CAPITAL LETTER ENG - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u0113' # 0xB2 -> LATIN SMALL LETTER E WITH MACRON - u'\u0123' # 0xB3 -> LATIN SMALL LETTER G WITH CEDILLA - u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON - u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE - u'\u0137' # 0xB6 -> LATIN SMALL LETTER K WITH CEDILLA - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u013c' # 0xB8 -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0111' # 0xB9 -> LATIN SMALL LETTER D WITH STROKE - u'\u0161' # 0xBA -> LATIN SMALL LETTER S WITH CARON - u'\u0167' # 0xBB -> LATIN SMALL LETTER T WITH STROKE - u'\u017e' # 0xBC -> LATIN SMALL LETTER Z WITH CARON - u'\u2015' # 0xBD -> HORIZONTAL BAR - u'\u016b' # 0xBE -> LATIN SMALL LETTER U WITH MACRON - u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG - u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic) - u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u0168' # 0xD7 -> LATIN CAPITAL LETTER U WITH TILDE - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN (Icelandic) - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) - u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH (Icelandic) - u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u0169' # 0xF7 -> LATIN SMALL LETTER U WITH TILDE - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN (Icelandic) - u'\u0138' # 0xFF -> LATIN SMALL LETTER KRA -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_11.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_11 generated from 'MAPPINGS/ISO8859/8859-11.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-11', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO - u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING - u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA - u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN - u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xC4 -> THAI CHARACTER RU - u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xC6 -> THAI CHARACTER LU - u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG - u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xF0 -> THAI DIGIT ZERO - u'\u0e51' # 0xF1 -> THAI DIGIT ONE - u'\u0e52' # 0xF2 -> THAI DIGIT TWO - u'\u0e53' # 0xF3 -> THAI DIGIT THREE - u'\u0e54' # 0xF4 -> THAI DIGIT FOUR - u'\u0e55' # 0xF5 -> THAI DIGIT FIVE - u'\u0e56' # 0xF6 -> THAI DIGIT SIX - u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xF9 -> THAI DIGIT NINE - u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_13.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_13 generated from 'MAPPINGS/ISO8859/8859-13.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-13', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u201d' # 0xA1 -> RIGHT DOUBLE QUOTATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xd8' # 0xA8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0156' # 0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xc6' # 0xAF -> LATIN CAPITAL LETTER AE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u201c' # 0xB4 -> LEFT DOUBLE QUOTATION MARK - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xf8' # 0xB8 -> LATIN SMALL LETTER O WITH STROKE - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\u0157' # 0xBA -> LATIN SMALL LETTER R WITH CEDILLA - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xe6' # 0xBF -> LATIN SMALL LETTER AE - u'\u0104' # 0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u012e' # 0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u0100' # 0xC2 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0106' # 0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\u0118' # 0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u0112' # 0xC7 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0179' # 0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u0116' # 0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\u0122' # 0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0136' # 0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u012a' # 0xCE -> LATIN CAPITAL LETTER I WITH MACRON - u'\u013b' # 0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u0160' # 0xD0 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0145' # 0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\u014c' # 0xD4 -> LATIN CAPITAL LETTER O WITH MACRON - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0172' # 0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0141' # 0xD9 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u015a' # 0xDA -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u016a' # 0xDB -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u017b' # 0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u017d' # 0xDE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S (German) - u'\u0105' # 0xE0 -> LATIN SMALL LETTER A WITH OGONEK - u'\u012f' # 0xE1 -> LATIN SMALL LETTER I WITH OGONEK - u'\u0101' # 0xE2 -> LATIN SMALL LETTER A WITH MACRON - u'\u0107' # 0xE3 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\u0119' # 0xE6 -> LATIN SMALL LETTER E WITH OGONEK - u'\u0113' # 0xE7 -> LATIN SMALL LETTER E WITH MACRON - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u017a' # 0xEA -> LATIN SMALL LETTER Z WITH ACUTE - u'\u0117' # 0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\u0123' # 0xEC -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0137' # 0xED -> LATIN SMALL LETTER K WITH CEDILLA - u'\u012b' # 0xEE -> LATIN SMALL LETTER I WITH MACRON - u'\u013c' # 0xEF -> LATIN SMALL LETTER L WITH CEDILLA - u'\u0161' # 0xF0 -> LATIN SMALL LETTER S WITH CARON - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0146' # 0xF2 -> LATIN SMALL LETTER N WITH CEDILLA - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\u014d' # 0xF4 -> LATIN SMALL LETTER O WITH MACRON - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0173' # 0xF8 -> LATIN SMALL LETTER U WITH OGONEK - u'\u0142' # 0xF9 -> LATIN SMALL LETTER L WITH STROKE - u'\u015b' # 0xFA -> LATIN SMALL LETTER S WITH ACUTE - u'\u016b' # 0xFB -> LATIN SMALL LETTER U WITH MACRON - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u017e' # 0xFE -> LATIN SMALL LETTER Z WITH CARON - u'\u2019' # 0xFF -> RIGHT SINGLE QUOTATION MARK -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_14.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_14 generated from 'MAPPINGS/ISO8859/8859-14.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-14', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u1e02' # 0xA1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE - u'\u1e03' # 0xA2 -> LATIN SMALL LETTER B WITH DOT ABOVE - u'\xa3' # 0xA3 -> POUND SIGN - u'\u010a' # 0xA4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE - u'\u010b' # 0xA5 -> LATIN SMALL LETTER C WITH DOT ABOVE - u'\u1e0a' # 0xA6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u1e80' # 0xA8 -> LATIN CAPITAL LETTER W WITH GRAVE - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u1e82' # 0xAA -> LATIN CAPITAL LETTER W WITH ACUTE - u'\u1e0b' # 0xAB -> LATIN SMALL LETTER D WITH DOT ABOVE - u'\u1ef2' # 0xAC -> LATIN CAPITAL LETTER Y WITH GRAVE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\u0178' # 0xAF -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u1e1e' # 0xB0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE - u'\u1e1f' # 0xB1 -> LATIN SMALL LETTER F WITH DOT ABOVE - u'\u0120' # 0xB2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE - u'\u0121' # 0xB3 -> LATIN SMALL LETTER G WITH DOT ABOVE - u'\u1e40' # 0xB4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE - u'\u1e41' # 0xB5 -> LATIN SMALL LETTER M WITH DOT ABOVE - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\u1e56' # 0xB7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE - u'\u1e81' # 0xB8 -> LATIN SMALL LETTER W WITH GRAVE - u'\u1e57' # 0xB9 -> LATIN SMALL LETTER P WITH DOT ABOVE - u'\u1e83' # 0xBA -> LATIN SMALL LETTER W WITH ACUTE - u'\u1e60' # 0xBB -> LATIN CAPITAL LETTER S WITH DOT ABOVE - u'\u1ef3' # 0xBC -> LATIN SMALL LETTER Y WITH GRAVE - u'\u1e84' # 0xBD -> LATIN CAPITAL LETTER W WITH DIAERESIS - u'\u1e85' # 0xBE -> LATIN SMALL LETTER W WITH DIAERESIS - u'\u1e61' # 0xBF -> LATIN SMALL LETTER S WITH DOT ABOVE - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0174' # 0xD0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u1e6a' # 0xD7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0176' # 0xDE -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0175' # 0xF0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u1e6b' # 0xF7 -> LATIN SMALL LETTER T WITH DOT ABOVE - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0177' # 0xFE -> LATIN SMALL LETTER Y WITH CIRCUMFLEX - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_15.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_15 generated from 'MAPPINGS/ISO8859/8859-15.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-15', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\u20ac' # 0xA4 -> EURO SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE - u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xd0' # 0xD0 -> LATIN CAPITAL LETTER ETH - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf0' # 0xF0 -> LATIN SMALL LETTER ETH - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\xfe' # 0xFE -> LATIN SMALL LETTER THORN - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_16.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_16 generated from 'MAPPINGS/ISO8859/8859-16.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-16', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0105' # 0xA2 -> LATIN SMALL LETTER A WITH OGONEK - u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\u20ac' # 0xA4 -> EURO SIGN - u'\u201e' # 0xA5 -> DOUBLE LOW-9 QUOTATION MARK - u'\u0160' # 0xA6 -> LATIN CAPITAL LETTER S WITH CARON - u'\xa7' # 0xA7 -> SECTION SIGN - u'\u0161' # 0xA8 -> LATIN SMALL LETTER S WITH CARON - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u0218' # 0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u017a' # 0xAE -> LATIN SMALL LETTER Z WITH ACUTE - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u010c' # 0xB2 -> LATIN CAPITAL LETTER C WITH CARON - u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE - u'\u017d' # 0xB4 -> LATIN CAPITAL LETTER Z WITH CARON - u'\u201d' # 0xB5 -> RIGHT DOUBLE QUOTATION MARK - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u017e' # 0xB8 -> LATIN SMALL LETTER Z WITH CARON - u'\u010d' # 0xB9 -> LATIN SMALL LETTER C WITH CARON - u'\u0219' # 0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u0152' # 0xBC -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xBD -> LATIN SMALL LIGATURE OE - u'\u0178' # 0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0106' # 0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\u015a' # 0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u0170' # 0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0118' # 0xDD -> LATIN CAPITAL LETTER E WITH OGONEK - u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0107' # 0xE5 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\u015b' # 0xF7 -> LATIN SMALL LETTER S WITH ACUTE - u'\u0171' # 0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0119' # 0xFD -> LATIN SMALL LETTER E WITH OGONEK - u'\u021b' # 0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_2.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_2 generated from 'MAPPINGS/ISO8859/8859-2.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-2', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u02d8' # 0xA2 -> BREVE - u'\u0141' # 0xA3 -> LATIN CAPITAL LETTER L WITH STROKE - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u013d' # 0xA5 -> LATIN CAPITAL LETTER L WITH CARON - u'\u015a' # 0xA6 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u0164' # 0xAB -> LATIN CAPITAL LETTER T WITH CARON - u'\u0179' # 0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u02db' # 0xB2 -> OGONEK - u'\u0142' # 0xB3 -> LATIN SMALL LETTER L WITH STROKE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\u013e' # 0xB5 -> LATIN SMALL LETTER L WITH CARON - u'\u015b' # 0xB6 -> LATIN SMALL LETTER S WITH ACUTE - u'\u02c7' # 0xB7 -> CARON - u'\xb8' # 0xB8 -> CEDILLA - u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON - u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA - u'\u0165' # 0xBB -> LATIN SMALL LETTER T WITH CARON - u'\u017a' # 0xBC -> LATIN SMALL LETTER Z WITH ACUTE - u'\u02dd' # 0xBD -> DOUBLE ACUTE ACCENT - u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0154' # 0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0102' # 0xC3 -> LATIN CAPITAL LETTER A WITH BREVE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0139' # 0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u011a' # 0xCC -> LATIN CAPITAL LETTER E WITH CARON - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u010e' # 0xCF -> LATIN CAPITAL LETTER D WITH CARON - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0143' # 0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\u0147' # 0xD2 -> LATIN CAPITAL LETTER N WITH CARON - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0150' # 0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u0158' # 0xD8 -> LATIN CAPITAL LETTER R WITH CARON - u'\u016e' # 0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u0170' # 0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xdd' # 0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\u0162' # 0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0155' # 0xE0 -> LATIN SMALL LETTER R WITH ACUTE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\u0103' # 0xE3 -> LATIN SMALL LETTER A WITH BREVE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u013a' # 0xE5 -> LATIN SMALL LETTER L WITH ACUTE - u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u011b' # 0xEC -> LATIN SMALL LETTER E WITH CARON - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u010f' # 0xEF -> LATIN SMALL LETTER D WITH CARON - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0144' # 0xF1 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0148' # 0xF2 -> LATIN SMALL LETTER N WITH CARON - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0151' # 0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u0159' # 0xF8 -> LATIN SMALL LETTER R WITH CARON - u'\u016f' # 0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\u0171' # 0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xfd' # 0xFD -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0163' # 0xFE -> LATIN SMALL LETTER T WITH CEDILLA - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_3.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_3 generated from 'MAPPINGS/ISO8859/8859-3.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-3', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0126' # 0xA1 -> LATIN CAPITAL LETTER H WITH STROKE - u'\u02d8' # 0xA2 -> BREVE - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\ufffe' - u'\u0124' # 0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\u0130' # 0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u011e' # 0xAB -> LATIN CAPITAL LETTER G WITH BREVE - u'\u0134' # 0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX - u'\xad' # 0xAD -> SOFT HYPHEN - u'\ufffe' - u'\u017b' # 0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0127' # 0xB1 -> LATIN SMALL LETTER H WITH STROKE - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u0125' # 0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\u0131' # 0xB9 -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xBA -> LATIN SMALL LETTER S WITH CEDILLA - u'\u011f' # 0xBB -> LATIN SMALL LETTER G WITH BREVE - u'\u0135' # 0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\ufffe' - u'\u017c' # 0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\ufffe' - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u010a' # 0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE - u'\u0108' # 0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\ufffe' - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0120' # 0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\u011c' # 0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u016c' # 0xDD -> LATIN CAPITAL LETTER U WITH BREVE - u'\u015c' # 0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\ufffe' - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u010b' # 0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE - u'\u0109' # 0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\ufffe' - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\u0121' # 0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\u011d' # 0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u016d' # 0xFD -> LATIN SMALL LETTER U WITH BREVE - u'\u015d' # 0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_4.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_4 generated from 'MAPPINGS/ISO8859/8859-4.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-4', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0104' # 0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\u0138' # 0xA2 -> LATIN SMALL LETTER KRA - u'\u0156' # 0xA3 -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\u0128' # 0xA5 -> LATIN CAPITAL LETTER I WITH TILDE - u'\u013b' # 0xA6 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u0112' # 0xAA -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0122' # 0xAB -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u0166' # 0xAC -> LATIN CAPITAL LETTER T WITH STROKE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\u0105' # 0xB1 -> LATIN SMALL LETTER A WITH OGONEK - u'\u02db' # 0xB2 -> OGONEK - u'\u0157' # 0xB3 -> LATIN SMALL LETTER R WITH CEDILLA - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\u0129' # 0xB5 -> LATIN SMALL LETTER I WITH TILDE - u'\u013c' # 0xB6 -> LATIN SMALL LETTER L WITH CEDILLA - u'\u02c7' # 0xB7 -> CARON - u'\xb8' # 0xB8 -> CEDILLA - u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON - u'\u0113' # 0xBA -> LATIN SMALL LETTER E WITH MACRON - u'\u0123' # 0xBB -> LATIN SMALL LETTER G WITH CEDILLA - u'\u0167' # 0xBC -> LATIN SMALL LETTER T WITH STROKE - u'\u014a' # 0xBD -> LATIN CAPITAL LETTER ENG - u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON - u'\u014b' # 0xBF -> LATIN SMALL LETTER ENG - u'\u0100' # 0xC0 -> LATIN CAPITAL LETTER A WITH MACRON - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\u012e' # 0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0118' # 0xCA -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u0116' # 0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\u012a' # 0xCF -> LATIN CAPITAL LETTER I WITH MACRON - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u0145' # 0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u014c' # 0xD2 -> LATIN CAPITAL LETTER O WITH MACRON - u'\u0136' # 0xD3 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\u0172' # 0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0168' # 0xDD -> LATIN CAPITAL LETTER U WITH TILDE - u'\u016a' # 0xDE -> LATIN CAPITAL LETTER U WITH MACRON - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\u0101' # 0xE0 -> LATIN SMALL LETTER A WITH MACRON - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\u012f' # 0xE7 -> LATIN SMALL LETTER I WITH OGONEK - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\u0119' # 0xEA -> LATIN SMALL LETTER E WITH OGONEK - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\u0117' # 0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\u012b' # 0xEF -> LATIN SMALL LETTER I WITH MACRON - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\u0146' # 0xF1 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u014d' # 0xF2 -> LATIN SMALL LETTER O WITH MACRON - u'\u0137' # 0xF3 -> LATIN SMALL LETTER K WITH CEDILLA - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\u0173' # 0xF9 -> LATIN SMALL LETTER U WITH OGONEK - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0169' # 0xFD -> LATIN SMALL LETTER U WITH TILDE - u'\u016b' # 0xFE -> LATIN SMALL LETTER U WITH MACRON - u'\u02d9' # 0xFF -> DOT ABOVE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_5.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_5 generated from 'MAPPINGS/ISO8859/8859-5.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-5', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u0401' # 0xA1 -> CYRILLIC CAPITAL LETTER IO - u'\u0402' # 0xA2 -> CYRILLIC CAPITAL LETTER DJE - u'\u0403' # 0xA3 -> CYRILLIC CAPITAL LETTER GJE - u'\u0404' # 0xA4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0405' # 0xA5 -> CYRILLIC CAPITAL LETTER DZE - u'\u0406' # 0xA6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0407' # 0xA7 -> CYRILLIC CAPITAL LETTER YI - u'\u0408' # 0xA8 -> CYRILLIC CAPITAL LETTER JE - u'\u0409' # 0xA9 -> CYRILLIC CAPITAL LETTER LJE - u'\u040a' # 0xAA -> CYRILLIC CAPITAL LETTER NJE - u'\u040b' # 0xAB -> CYRILLIC CAPITAL LETTER TSHE - u'\u040c' # 0xAC -> CYRILLIC CAPITAL LETTER KJE - u'\xad' # 0xAD -> SOFT HYPHEN - u'\u040e' # 0xAE -> CYRILLIC CAPITAL LETTER SHORT U - u'\u040f' # 0xAF -> CYRILLIC CAPITAL LETTER DZHE - u'\u0410' # 0xB0 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xB1 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0xB2 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0xB3 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0xB4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xB5 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0xB6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0xB7 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0xB8 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xB9 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xBA -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xBB -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xBC -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xBD -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xBE -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xBF -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0xC0 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xC1 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xC2 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xC3 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0xC4 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0xC5 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0xC6 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0xC7 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0xC8 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0xC9 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0xCA -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0xCB -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0xCC -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0xCD -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0xCE -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0xCF -> CYRILLIC CAPITAL LETTER YA - u'\u0430' # 0xD0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xD1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xD2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xD3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xD4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xD5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xD7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xD8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xD9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xDA -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xDB -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xDC -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xDD -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xDE -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xDF -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xE0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xE1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xE2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xE3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xE4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xE5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xE6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xE7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xE8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xE9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xEA -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xEB -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xEC -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xED -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xEE -> CYRILLIC SMALL LETTER YU - u'\u044f' # 0xEF -> CYRILLIC SMALL LETTER YA - u'\u2116' # 0xF0 -> NUMERO SIGN - u'\u0451' # 0xF1 -> CYRILLIC SMALL LETTER IO - u'\u0452' # 0xF2 -> CYRILLIC SMALL LETTER DJE - u'\u0453' # 0xF3 -> CYRILLIC SMALL LETTER GJE - u'\u0454' # 0xF4 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0455' # 0xF5 -> CYRILLIC SMALL LETTER DZE - u'\u0456' # 0xF6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0xF7 -> CYRILLIC SMALL LETTER YI - u'\u0458' # 0xF8 -> CYRILLIC SMALL LETTER JE - u'\u0459' # 0xF9 -> CYRILLIC SMALL LETTER LJE - u'\u045a' # 0xFA -> CYRILLIC SMALL LETTER NJE - u'\u045b' # 0xFB -> CYRILLIC SMALL LETTER TSHE - u'\u045c' # 0xFC -> CYRILLIC SMALL LETTER KJE - u'\xa7' # 0xFD -> SECTION SIGN - u'\u045e' # 0xFE -> CYRILLIC SMALL LETTER SHORT U - u'\u045f' # 0xFF -> CYRILLIC SMALL LETTER DZHE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_6.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_6 generated from 'MAPPINGS/ISO8859/8859-6.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-6', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u060c' # 0xAC -> ARABIC COMMA - u'\xad' # 0xAD -> SOFT HYPHEN - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u061b' # 0xBB -> ARABIC SEMICOLON - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u061f' # 0xBF -> ARABIC QUESTION MARK - u'\ufffe' - u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xC7 -> ARABIC LETTER ALEF - u'\u0628' # 0xC8 -> ARABIC LETTER BEH - u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xCA -> ARABIC LETTER TEH - u'\u062b' # 0xCB -> ARABIC LETTER THEH - u'\u062c' # 0xCC -> ARABIC LETTER JEEM - u'\u062d' # 0xCD -> ARABIC LETTER HAH - u'\u062e' # 0xCE -> ARABIC LETTER KHAH - u'\u062f' # 0xCF -> ARABIC LETTER DAL - u'\u0630' # 0xD0 -> ARABIC LETTER THAL - u'\u0631' # 0xD1 -> ARABIC LETTER REH - u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xD3 -> ARABIC LETTER SEEN - u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xD5 -> ARABIC LETTER SAD - u'\u0636' # 0xD6 -> ARABIC LETTER DAD - u'\u0637' # 0xD7 -> ARABIC LETTER TAH - u'\u0638' # 0xD8 -> ARABIC LETTER ZAH - u'\u0639' # 0xD9 -> ARABIC LETTER AIN - u'\u063a' # 0xDA -> ARABIC LETTER GHAIN - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u0640' # 0xE0 -> ARABIC TATWEEL - u'\u0641' # 0xE1 -> ARABIC LETTER FEH - u'\u0642' # 0xE2 -> ARABIC LETTER QAF - u'\u0643' # 0xE3 -> ARABIC LETTER KAF - u'\u0644' # 0xE4 -> ARABIC LETTER LAM - u'\u0645' # 0xE5 -> ARABIC LETTER MEEM - u'\u0646' # 0xE6 -> ARABIC LETTER NOON - u'\u0647' # 0xE7 -> ARABIC LETTER HEH - u'\u0648' # 0xE8 -> ARABIC LETTER WAW - u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xEA -> ARABIC LETTER YEH - u'\u064b' # 0xEB -> ARABIC FATHATAN - u'\u064c' # 0xEC -> ARABIC DAMMATAN - u'\u064d' # 0xED -> ARABIC KASRATAN - u'\u064e' # 0xEE -> ARABIC FATHA - u'\u064f' # 0xEF -> ARABIC DAMMA - u'\u0650' # 0xF0 -> ARABIC KASRA - u'\u0651' # 0xF1 -> ARABIC SHADDA - u'\u0652' # 0xF2 -> ARABIC SUKUN - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_7.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_7 generated from 'MAPPINGS/ISO8859/8859-7.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-7', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\u2018' # 0xA1 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xA2 -> RIGHT SINGLE QUOTATION MARK - u'\xa3' # 0xA3 -> POUND SIGN - u'\u20ac' # 0xA4 -> EURO SIGN - u'\u20af' # 0xA5 -> DRACHMA SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u037a' # 0xAA -> GREEK YPOGEGRAMMENI - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\ufffe' - u'\u2015' # 0xAF -> HORIZONTAL BAR - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\u0384' # 0xB4 -> GREEK TONOS - u'\u0385' # 0xB5 -> GREEK DIALYTIKA TONOS - u'\u0386' # 0xB6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\u0388' # 0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0389' # 0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u038c' # 0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\u038e' # 0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u038f' # 0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u0390' # 0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u0391' # 0xC1 -> GREEK CAPITAL LETTER ALPHA - u'\u0392' # 0xC2 -> GREEK CAPITAL LETTER BETA - u'\u0393' # 0xC3 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xC4 -> GREEK CAPITAL LETTER DELTA - u'\u0395' # 0xC5 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xC6 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xC7 -> GREEK CAPITAL LETTER ETA - u'\u0398' # 0xC8 -> GREEK CAPITAL LETTER THETA - u'\u0399' # 0xC9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xCA -> GREEK CAPITAL LETTER KAPPA - u'\u039b' # 0xCB -> GREEK CAPITAL LETTER LAMDA - u'\u039c' # 0xCC -> GREEK CAPITAL LETTER MU - u'\u039d' # 0xCD -> GREEK CAPITAL LETTER NU - u'\u039e' # 0xCE -> GREEK CAPITAL LETTER XI - u'\u039f' # 0xCF -> GREEK CAPITAL LETTER OMICRON - u'\u03a0' # 0xD0 -> GREEK CAPITAL LETTER PI - u'\u03a1' # 0xD1 -> GREEK CAPITAL LETTER RHO - u'\ufffe' - u'\u03a3' # 0xD3 -> GREEK CAPITAL LETTER SIGMA - u'\u03a4' # 0xD4 -> GREEK CAPITAL LETTER TAU - u'\u03a5' # 0xD5 -> GREEK CAPITAL LETTER UPSILON - u'\u03a6' # 0xD6 -> GREEK CAPITAL LETTER PHI - u'\u03a7' # 0xD7 -> GREEK CAPITAL LETTER CHI - u'\u03a8' # 0xD8 -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xD9 -> GREEK CAPITAL LETTER OMEGA - u'\u03aa' # 0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\u03ab' # 0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03ac' # 0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u03ad' # 0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xDE -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xDF -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03b0' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA - u'\u03b3' # 0xE3 -> GREEK SMALL LETTER GAMMA - u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON - u'\u03b6' # 0xE6 -> GREEK SMALL LETTER ZETA - u'\u03b7' # 0xE7 -> GREEK SMALL LETTER ETA - u'\u03b8' # 0xE8 -> GREEK SMALL LETTER THETA - u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA - u'\u03ba' # 0xEA -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xEB -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xEC -> GREEK SMALL LETTER MU - u'\u03bd' # 0xED -> GREEK SMALL LETTER NU - u'\u03be' # 0xEE -> GREEK SMALL LETTER XI - u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI - u'\u03c1' # 0xF1 -> GREEK SMALL LETTER RHO - u'\u03c2' # 0xF2 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU - u'\u03c5' # 0xF5 -> GREEK SMALL LETTER UPSILON - u'\u03c6' # 0xF6 -> GREEK SMALL LETTER PHI - u'\u03c7' # 0xF7 -> GREEK SMALL LETTER CHI - u'\u03c8' # 0xF8 -> GREEK SMALL LETTER PSI - u'\u03c9' # 0xF9 -> GREEK SMALL LETTER OMEGA - u'\u03ca' # 0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u03cc' # 0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u03cd' # 0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03ce' # 0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_8.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_8 generated from 'MAPPINGS/ISO8859/8859-8.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-8', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\ufffe' - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xd7' # 0xAA -> MULTIPLICATION SIGN - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xf7' # 0xBA -> DIVISION SIGN - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u2017' # 0xDF -> DOUBLE LOW LINE - u'\u05d0' # 0xE0 -> HEBREW LETTER ALEF - u'\u05d1' # 0xE1 -> HEBREW LETTER BET - u'\u05d2' # 0xE2 -> HEBREW LETTER GIMEL - u'\u05d3' # 0xE3 -> HEBREW LETTER DALET - u'\u05d4' # 0xE4 -> HEBREW LETTER HE - u'\u05d5' # 0xE5 -> HEBREW LETTER VAV - u'\u05d6' # 0xE6 -> HEBREW LETTER ZAYIN - u'\u05d7' # 0xE7 -> HEBREW LETTER HET - u'\u05d8' # 0xE8 -> HEBREW LETTER TET - u'\u05d9' # 0xE9 -> HEBREW LETTER YOD - u'\u05da' # 0xEA -> HEBREW LETTER FINAL KAF - u'\u05db' # 0xEB -> HEBREW LETTER KAF - u'\u05dc' # 0xEC -> HEBREW LETTER LAMED - u'\u05dd' # 0xED -> HEBREW LETTER FINAL MEM - u'\u05de' # 0xEE -> HEBREW LETTER MEM - u'\u05df' # 0xEF -> HEBREW LETTER FINAL NUN - u'\u05e0' # 0xF0 -> HEBREW LETTER NUN - u'\u05e1' # 0xF1 -> HEBREW LETTER SAMEKH - u'\u05e2' # 0xF2 -> HEBREW LETTER AYIN - u'\u05e3' # 0xF3 -> HEBREW LETTER FINAL PE - u'\u05e4' # 0xF4 -> HEBREW LETTER PE - u'\u05e5' # 0xF5 -> HEBREW LETTER FINAL TSADI - u'\u05e6' # 0xF6 -> HEBREW LETTER TSADI - u'\u05e7' # 0xF7 -> HEBREW LETTER QOF - u'\u05e8' # 0xF8 -> HEBREW LETTER RESH - u'\u05e9' # 0xF9 -> HEBREW LETTER SHIN - u'\u05ea' # 0xFA -> HEBREW LETTER TAV - u'\ufffe' - u'\ufffe' - u'\u200e' # 0xFD -> LEFT-TO-RIGHT MARK - u'\u200f' # 0xFE -> RIGHT-TO-LEFT MARK - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/iso8859_9.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec iso8859_9 generated from 'MAPPINGS/ISO8859/8859-9.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-9', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\xa0' # 0xA0 -> NO-BREAK SPACE - u'\xa1' # 0xA1 -> INVERTED EXCLAMATION MARK - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa4' # 0xA4 -> CURRENCY SIGN - u'\xa5' # 0xA5 -> YEN SIGN - u'\xa6' # 0xA6 -> BROKEN BAR - u'\xa7' # 0xA7 -> SECTION SIGN - u'\xa8' # 0xA8 -> DIAERESIS - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\xaa' # 0xAA -> FEMININE ORDINAL INDICATOR - u'\xab' # 0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xac' # 0xAC -> NOT SIGN - u'\xad' # 0xAD -> SOFT HYPHEN - u'\xae' # 0xAE -> REGISTERED SIGN - u'\xaf' # 0xAF -> MACRON - u'\xb0' # 0xB0 -> DEGREE SIGN - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\xb2' # 0xB2 -> SUPERSCRIPT TWO - u'\xb3' # 0xB3 -> SUPERSCRIPT THREE - u'\xb4' # 0xB4 -> ACUTE ACCENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\xb6' # 0xB6 -> PILCROW SIGN - u'\xb7' # 0xB7 -> MIDDLE DOT - u'\xb8' # 0xB8 -> CEDILLA - u'\xb9' # 0xB9 -> SUPERSCRIPT ONE - u'\xba' # 0xBA -> MASCULINE ORDINAL INDICATOR - u'\xbb' # 0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbc' # 0xBC -> VULGAR FRACTION ONE QUARTER - u'\xbd' # 0xBD -> VULGAR FRACTION ONE HALF - u'\xbe' # 0xBE -> VULGAR FRACTION THREE QUARTERS - u'\xbf' # 0xBF -> INVERTED QUESTION MARK - u'\xc0' # 0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc1' # 0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xc2' # 0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xc3' # 0xC3 -> LATIN CAPITAL LETTER A WITH TILDE - u'\xc4' # 0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc6' # 0xC6 -> LATIN CAPITAL LETTER AE - u'\xc7' # 0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc8' # 0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xc9' # 0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xca' # 0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xcb' # 0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xcc' # 0xCC -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xcd' # 0xCD -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\u011e' # 0xD0 -> LATIN CAPITAL LETTER G WITH BREVE - u'\xd1' # 0xD1 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd2' # 0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xd3' # 0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\xd5' # 0xD5 -> LATIN CAPITAL LETTER O WITH TILDE - u'\xd6' # 0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xd7' # 0xD7 -> MULTIPLICATION SIGN - u'\xd8' # 0xD8 -> LATIN CAPITAL LETTER O WITH STROKE - u'\xd9' # 0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\xda' # 0xDA -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xdc' # 0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0130' # 0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\xdf' # 0xDF -> LATIN SMALL LETTER SHARP S - u'\xe0' # 0xE0 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe1' # 0xE1 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe2' # 0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe3' # 0xE3 -> LATIN SMALL LETTER A WITH TILDE - u'\xe4' # 0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe5' # 0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe6' # 0xE6 -> LATIN SMALL LETTER AE - u'\xe7' # 0xE7 -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe8' # 0xE8 -> LATIN SMALL LETTER E WITH GRAVE - u'\xe9' # 0xE9 -> LATIN SMALL LETTER E WITH ACUTE - u'\xea' # 0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0xEB -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xec' # 0xEC -> LATIN SMALL LETTER I WITH GRAVE - u'\xed' # 0xED -> LATIN SMALL LETTER I WITH ACUTE - u'\xee' # 0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0xEF -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u011f' # 0xF0 -> LATIN SMALL LETTER G WITH BREVE - u'\xf1' # 0xF1 -> LATIN SMALL LETTER N WITH TILDE - u'\xf2' # 0xF2 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf3' # 0xF3 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf4' # 0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf5' # 0xF5 -> LATIN SMALL LETTER O WITH TILDE - u'\xf6' # 0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0xF7 -> DIVISION SIGN - u'\xf8' # 0xF8 -> LATIN SMALL LETTER O WITH STROKE - u'\xf9' # 0xF9 -> LATIN SMALL LETTER U WITH GRAVE - u'\xfa' # 0xFA -> LATIN SMALL LETTER U WITH ACUTE - u'\xfb' # 0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0xFC -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u0131' # 0xFD -> LATIN SMALL LETTER DOTLESS I - u'\u015f' # 0xFE -> LATIN SMALL LETTER S WITH CEDILLA - u'\xff' # 0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/johab.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# johab.py: Python Unicode Codec for JOHAB -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_kr, codecs -import _multibytecodec as mbc - -codec = _codecs_kr.getcodec('johab') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='johab', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/koi8_r.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec koi8_r generated from 'MAPPINGS/VENDORS/MISC/KOI8-R.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='koi8-r', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL - u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u2580' # 0x8B -> UPPER HALF BLOCK - u'\u2584' # 0x8C -> LOWER HALF BLOCK - u'\u2588' # 0x8D -> FULL BLOCK - u'\u258c' # 0x8E -> LEFT HALF BLOCK - u'\u2590' # 0x8F -> RIGHT HALF BLOCK - u'\u2591' # 0x90 -> LIGHT SHADE - u'\u2592' # 0x91 -> MEDIUM SHADE - u'\u2593' # 0x92 -> DARK SHADE - u'\u2320' # 0x93 -> TOP HALF INTEGRAL - u'\u25a0' # 0x94 -> BLACK SQUARE - u'\u2219' # 0x95 -> BULLET OPERATOR - u'\u221a' # 0x96 -> SQUARE ROOT - u'\u2248' # 0x97 -> ALMOST EQUAL TO - u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO - u'\xa0' # 0x9A -> NO-BREAK SPACE - u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL - u'\xb0' # 0x9C -> DEGREE SIGN - u'\xb2' # 0x9D -> SUPERSCRIPT TWO - u'\xb7' # 0x9E -> MIDDLE DOT - u'\xf7' # 0x9F -> DIVISION SIGN - u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO - u'\u2553' # 0xA4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE - u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u2555' # 0xA6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE - u'\u2556' # 0xA7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE - u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u255c' # 0xAD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE - u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO - u'\u2562' # 0xB4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE - u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u2564' # 0xB6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE - u'\u2565' # 0xB7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE - u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u256b' # 0xBD -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE - u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa9' # 0xBF -> COPYRIGHT SIGN - u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU - u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE - u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE - u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE - u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF - u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE - u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA - u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE - u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA - u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U - u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE - u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE - u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU - u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE - u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA - u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E - u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA - u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE - u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU - u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE - u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE - u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE - u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF - u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE - u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA - u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE - u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA - u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U - u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE - u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU - u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE - u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA - u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E - u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE - u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/koi8_u.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec koi8_u generated from 'python-mappings/KOI8-U.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='koi8-u', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\u2500' # 0x80 -> BOX DRAWINGS LIGHT HORIZONTAL - u'\u2502' # 0x81 -> BOX DRAWINGS LIGHT VERTICAL - u'\u250c' # 0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT - u'\u2510' # 0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT - u'\u2514' # 0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT - u'\u2518' # 0x85 -> BOX DRAWINGS LIGHT UP AND LEFT - u'\u251c' # 0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT - u'\u2524' # 0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT - u'\u252c' # 0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL - u'\u2534' # 0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL - u'\u253c' # 0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL - u'\u2580' # 0x8B -> UPPER HALF BLOCK - u'\u2584' # 0x8C -> LOWER HALF BLOCK - u'\u2588' # 0x8D -> FULL BLOCK - u'\u258c' # 0x8E -> LEFT HALF BLOCK - u'\u2590' # 0x8F -> RIGHT HALF BLOCK - u'\u2591' # 0x90 -> LIGHT SHADE - u'\u2592' # 0x91 -> MEDIUM SHADE - u'\u2593' # 0x92 -> DARK SHADE - u'\u2320' # 0x93 -> TOP HALF INTEGRAL - u'\u25a0' # 0x94 -> BLACK SQUARE - u'\u2219' # 0x95 -> BULLET OPERATOR - u'\u221a' # 0x96 -> SQUARE ROOT - u'\u2248' # 0x97 -> ALMOST EQUAL TO - u'\u2264' # 0x98 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0x99 -> GREATER-THAN OR EQUAL TO - u'\xa0' # 0x9A -> NO-BREAK SPACE - u'\u2321' # 0x9B -> BOTTOM HALF INTEGRAL - u'\xb0' # 0x9C -> DEGREE SIGN - u'\xb2' # 0x9D -> SUPERSCRIPT TWO - u'\xb7' # 0x9E -> MIDDLE DOT - u'\xf7' # 0x9F -> DIVISION SIGN - u'\u2550' # 0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL - u'\u2551' # 0xA1 -> BOX DRAWINGS DOUBLE VERTICAL - u'\u2552' # 0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE - u'\u0451' # 0xA3 -> CYRILLIC SMALL LETTER IO - u'\u0454' # 0xA4 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u2554' # 0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT - u'\u0456' # 0xA6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0457' # 0xA7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN) - u'\u2557' # 0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT - u'\u2558' # 0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE - u'\u2559' # 0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE - u'\u255a' # 0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT - u'\u255b' # 0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE - u'\u0491' # 0xAD -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN - u'\u255d' # 0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT - u'\u255e' # 0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE - u'\u255f' # 0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE - u'\u2560' # 0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT - u'\u2561' # 0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE - u'\u0401' # 0xB3 -> CYRILLIC CAPITAL LETTER IO - u'\u0404' # 0xB4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u2563' # 0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT - u'\u0406' # 0xB6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\u0407' # 0xB7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN) - u'\u2566' # 0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL - u'\u2567' # 0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE - u'\u2568' # 0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE - u'\u2569' # 0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL - u'\u256a' # 0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE - u'\u0490' # 0xBD -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN - u'\u256c' # 0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL - u'\xa9' # 0xBF -> COPYRIGHT SIGN - u'\u044e' # 0xC0 -> CYRILLIC SMALL LETTER YU - u'\u0430' # 0xC1 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xC2 -> CYRILLIC SMALL LETTER BE - u'\u0446' # 0xC3 -> CYRILLIC SMALL LETTER TSE - u'\u0434' # 0xC4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xC5 -> CYRILLIC SMALL LETTER IE - u'\u0444' # 0xC6 -> CYRILLIC SMALL LETTER EF - u'\u0433' # 0xC7 -> CYRILLIC SMALL LETTER GHE - u'\u0445' # 0xC8 -> CYRILLIC SMALL LETTER HA - u'\u0438' # 0xC9 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xCA -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xCB -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xCC -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xCD -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xCE -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xCF -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xD0 -> CYRILLIC SMALL LETTER PE - u'\u044f' # 0xD1 -> CYRILLIC SMALL LETTER YA - u'\u0440' # 0xD2 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xD3 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xD4 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xD5 -> CYRILLIC SMALL LETTER U - u'\u0436' # 0xD6 -> CYRILLIC SMALL LETTER ZHE - u'\u0432' # 0xD7 -> CYRILLIC SMALL LETTER VE - u'\u044c' # 0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044b' # 0xD9 -> CYRILLIC SMALL LETTER YERU - u'\u0437' # 0xDA -> CYRILLIC SMALL LETTER ZE - u'\u0448' # 0xDB -> CYRILLIC SMALL LETTER SHA - u'\u044d' # 0xDC -> CYRILLIC SMALL LETTER E - u'\u0449' # 0xDD -> CYRILLIC SMALL LETTER SHCHA - u'\u0447' # 0xDE -> CYRILLIC SMALL LETTER CHE - u'\u044a' # 0xDF -> CYRILLIC SMALL LETTER HARD SIGN - u'\u042e' # 0xE0 -> CYRILLIC CAPITAL LETTER YU - u'\u0410' # 0xE1 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0xE2 -> CYRILLIC CAPITAL LETTER BE - u'\u0426' # 0xE3 -> CYRILLIC CAPITAL LETTER TSE - u'\u0414' # 0xE4 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0xE5 -> CYRILLIC CAPITAL LETTER IE - u'\u0424' # 0xE6 -> CYRILLIC CAPITAL LETTER EF - u'\u0413' # 0xE7 -> CYRILLIC CAPITAL LETTER GHE - u'\u0425' # 0xE8 -> CYRILLIC CAPITAL LETTER HA - u'\u0418' # 0xE9 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0xEA -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0xEB -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0xEC -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0xED -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0xEE -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0xEF -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0xF0 -> CYRILLIC CAPITAL LETTER PE - u'\u042f' # 0xF1 -> CYRILLIC CAPITAL LETTER YA - u'\u0420' # 0xF2 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0xF3 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0xF4 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0xF5 -> CYRILLIC CAPITAL LETTER U - u'\u0416' # 0xF6 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0412' # 0xF7 -> CYRILLIC CAPITAL LETTER VE - u'\u042c' # 0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042b' # 0xF9 -> CYRILLIC CAPITAL LETTER YERU - u'\u0417' # 0xFA -> CYRILLIC CAPITAL LETTER ZE - u'\u0428' # 0xFB -> CYRILLIC CAPITAL LETTER SHA - u'\u042d' # 0xFC -> CYRILLIC CAPITAL LETTER E - u'\u0429' # 0xFD -> CYRILLIC CAPITAL LETTER SHCHA - u'\u0427' # 0xFE -> CYRILLIC CAPITAL LETTER CHE - u'\u042a' # 0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/latin_1.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -""" Python 'latin-1' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.latin_1_encode - decode = codecs.latin_1_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.latin_1_encode(input,self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.latin_1_decode(input,self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -class StreamConverter(StreamWriter,StreamReader): - - encode = codecs.latin_1_decode - decode = codecs.latin_1_encode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='iso8859-1', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/mac_arabic.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,698 +0,0 @@ -""" Python Character Mapping Codec generated from 'VENDORS/APPLE/ARABIC.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-arabic', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x00a0, # NO-BREAK SPACE, right-left - 0x0082: 0x00c7, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x00d1, # LATIN CAPITAL LETTER N WITH TILDE - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x00e0, # LATIN SMALL LETTER A WITH GRAVE - 0x0089: 0x00e2, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x06ba, # ARABIC LETTER NOON GHUNNA - 0x008c: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x008d: 0x00e7, # LATIN SMALL LETTER C WITH CEDILLA - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x00e8, # LATIN SMALL LETTER E WITH GRAVE - 0x0090: 0x00ea, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x0091: 0x00eb, # LATIN SMALL LETTER E WITH DIAERESIS - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x2026, # HORIZONTAL ELLIPSIS, right-left - 0x0094: 0x00ee, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x0095: 0x00ef, # LATIN SMALL LETTER I WITH DIAERESIS - 0x0096: 0x00f1, # LATIN SMALL LETTER N WITH TILDE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f7, # DIVISION SIGN, right-left - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x00f9, # LATIN SMALL LETTER U WITH GRAVE - 0x009e: 0x00fb, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x0020, # SPACE, right-left - 0x00a1: 0x0021, # EXCLAMATION MARK, right-left - 0x00a2: 0x0022, # QUOTATION MARK, right-left - 0x00a3: 0x0023, # NUMBER SIGN, right-left - 0x00a4: 0x0024, # DOLLAR SIGN, right-left - 0x00a5: 0x066a, # ARABIC PERCENT SIGN - 0x00a6: 0x0026, # AMPERSAND, right-left - 0x00a7: 0x0027, # APOSTROPHE, right-left - 0x00a8: 0x0028, # LEFT PARENTHESIS, right-left - 0x00a9: 0x0029, # RIGHT PARENTHESIS, right-left - 0x00aa: 0x002a, # ASTERISK, right-left - 0x00ab: 0x002b, # PLUS SIGN, right-left - 0x00ac: 0x060c, # ARABIC COMMA - 0x00ad: 0x002d, # HYPHEN-MINUS, right-left - 0x00ae: 0x002e, # FULL STOP, right-left - 0x00af: 0x002f, # SOLIDUS, right-left - 0x00b0: 0x0660, # ARABIC-INDIC DIGIT ZERO, right-left (need override) - 0x00b1: 0x0661, # ARABIC-INDIC DIGIT ONE, right-left (need override) - 0x00b2: 0x0662, # ARABIC-INDIC DIGIT TWO, right-left (need override) - 0x00b3: 0x0663, # ARABIC-INDIC DIGIT THREE, right-left (need override) - 0x00b4: 0x0664, # ARABIC-INDIC DIGIT FOUR, right-left (need override) - 0x00b5: 0x0665, # ARABIC-INDIC DIGIT FIVE, right-left (need override) - 0x00b6: 0x0666, # ARABIC-INDIC DIGIT SIX, right-left (need override) - 0x00b7: 0x0667, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) - 0x00b8: 0x0668, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) - 0x00b9: 0x0669, # ARABIC-INDIC DIGIT NINE, right-left (need override) - 0x00ba: 0x003a, # COLON, right-left - 0x00bb: 0x061b, # ARABIC SEMICOLON - 0x00bc: 0x003c, # LESS-THAN SIGN, right-left - 0x00bd: 0x003d, # EQUALS SIGN, right-left - 0x00be: 0x003e, # GREATER-THAN SIGN, right-left - 0x00bf: 0x061f, # ARABIC QUESTION MARK - 0x00c0: 0x274a, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - 0x00c1: 0x0621, # ARABIC LETTER HAMZA - 0x00c2: 0x0622, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x00c3: 0x0623, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x00c4: 0x0624, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x00c5: 0x0625, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x00c6: 0x0626, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x00c7: 0x0627, # ARABIC LETTER ALEF - 0x00c8: 0x0628, # ARABIC LETTER BEH - 0x00c9: 0x0629, # ARABIC LETTER TEH MARBUTA - 0x00ca: 0x062a, # ARABIC LETTER TEH - 0x00cb: 0x062b, # ARABIC LETTER THEH - 0x00cc: 0x062c, # ARABIC LETTER JEEM - 0x00cd: 0x062d, # ARABIC LETTER HAH - 0x00ce: 0x062e, # ARABIC LETTER KHAH - 0x00cf: 0x062f, # ARABIC LETTER DAL - 0x00d0: 0x0630, # ARABIC LETTER THAL - 0x00d1: 0x0631, # ARABIC LETTER REH - 0x00d2: 0x0632, # ARABIC LETTER ZAIN - 0x00d3: 0x0633, # ARABIC LETTER SEEN - 0x00d4: 0x0634, # ARABIC LETTER SHEEN - 0x00d5: 0x0635, # ARABIC LETTER SAD - 0x00d6: 0x0636, # ARABIC LETTER DAD - 0x00d7: 0x0637, # ARABIC LETTER TAH - 0x00d8: 0x0638, # ARABIC LETTER ZAH - 0x00d9: 0x0639, # ARABIC LETTER AIN - 0x00da: 0x063a, # ARABIC LETTER GHAIN - 0x00db: 0x005b, # LEFT SQUARE BRACKET, right-left - 0x00dc: 0x005c, # REVERSE SOLIDUS, right-left - 0x00dd: 0x005d, # RIGHT SQUARE BRACKET, right-left - 0x00de: 0x005e, # CIRCUMFLEX ACCENT, right-left - 0x00df: 0x005f, # LOW LINE, right-left - 0x00e0: 0x0640, # ARABIC TATWEEL - 0x00e1: 0x0641, # ARABIC LETTER FEH - 0x00e2: 0x0642, # ARABIC LETTER QAF - 0x00e3: 0x0643, # ARABIC LETTER KAF - 0x00e4: 0x0644, # ARABIC LETTER LAM - 0x00e5: 0x0645, # ARABIC LETTER MEEM - 0x00e6: 0x0646, # ARABIC LETTER NOON - 0x00e7: 0x0647, # ARABIC LETTER HEH - 0x00e8: 0x0648, # ARABIC LETTER WAW - 0x00e9: 0x0649, # ARABIC LETTER ALEF MAKSURA - 0x00ea: 0x064a, # ARABIC LETTER YEH - 0x00eb: 0x064b, # ARABIC FATHATAN - 0x00ec: 0x064c, # ARABIC DAMMATAN - 0x00ed: 0x064d, # ARABIC KASRATAN - 0x00ee: 0x064e, # ARABIC FATHA - 0x00ef: 0x064f, # ARABIC DAMMA - 0x00f0: 0x0650, # ARABIC KASRA - 0x00f1: 0x0651, # ARABIC SHADDA - 0x00f2: 0x0652, # ARABIC SUKUN - 0x00f3: 0x067e, # ARABIC LETTER PEH - 0x00f4: 0x0679, # ARABIC LETTER TTEH - 0x00f5: 0x0686, # ARABIC LETTER TCHEH - 0x00f6: 0x06d5, # ARABIC LETTER AE - 0x00f7: 0x06a4, # ARABIC LETTER VEH - 0x00f8: 0x06af, # ARABIC LETTER GAF - 0x00f9: 0x0688, # ARABIC LETTER DDAL - 0x00fa: 0x0691, # ARABIC LETTER RREH - 0x00fb: 0x007b, # LEFT CURLY BRACKET, right-left - 0x00fc: 0x007c, # VERTICAL LINE, right-left - 0x00fd: 0x007d, # RIGHT CURLY BRACKET, right-left - 0x00fe: 0x0698, # ARABIC LETTER JEH - 0x00ff: 0x06d2, # ARABIC LETTER YEH BARREE -}) - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x0000 -> CONTROL CHARACTER - u'\x01' # 0x0001 -> CONTROL CHARACTER - u'\x02' # 0x0002 -> CONTROL CHARACTER - u'\x03' # 0x0003 -> CONTROL CHARACTER - u'\x04' # 0x0004 -> CONTROL CHARACTER - u'\x05' # 0x0005 -> CONTROL CHARACTER - u'\x06' # 0x0006 -> CONTROL CHARACTER - u'\x07' # 0x0007 -> CONTROL CHARACTER - u'\x08' # 0x0008 -> CONTROL CHARACTER - u'\t' # 0x0009 -> CONTROL CHARACTER - u'\n' # 0x000a -> CONTROL CHARACTER - u'\x0b' # 0x000b -> CONTROL CHARACTER - u'\x0c' # 0x000c -> CONTROL CHARACTER - u'\r' # 0x000d -> CONTROL CHARACTER - u'\x0e' # 0x000e -> CONTROL CHARACTER - u'\x0f' # 0x000f -> CONTROL CHARACTER - u'\x10' # 0x0010 -> CONTROL CHARACTER - u'\x11' # 0x0011 -> CONTROL CHARACTER - u'\x12' # 0x0012 -> CONTROL CHARACTER - u'\x13' # 0x0013 -> CONTROL CHARACTER - u'\x14' # 0x0014 -> CONTROL CHARACTER - u'\x15' # 0x0015 -> CONTROL CHARACTER - u'\x16' # 0x0016 -> CONTROL CHARACTER - u'\x17' # 0x0017 -> CONTROL CHARACTER - u'\x18' # 0x0018 -> CONTROL CHARACTER - u'\x19' # 0x0019 -> CONTROL CHARACTER - u'\x1a' # 0x001a -> CONTROL CHARACTER - u'\x1b' # 0x001b -> CONTROL CHARACTER - u'\x1c' # 0x001c -> CONTROL CHARACTER - u'\x1d' # 0x001d -> CONTROL CHARACTER - u'\x1e' # 0x001e -> CONTROL CHARACTER - u'\x1f' # 0x001f -> CONTROL CHARACTER - u' ' # 0x0020 -> SPACE, left-right - u'!' # 0x0021 -> EXCLAMATION MARK, left-right - u'"' # 0x0022 -> QUOTATION MARK, left-right - u'#' # 0x0023 -> NUMBER SIGN, left-right - u'$' # 0x0024 -> DOLLAR SIGN, left-right - u'%' # 0x0025 -> PERCENT SIGN, left-right - u'&' # 0x0026 -> AMPERSAND, left-right - u"'" # 0x0027 -> APOSTROPHE, left-right - u'(' # 0x0028 -> LEFT PARENTHESIS, left-right - u')' # 0x0029 -> RIGHT PARENTHESIS, left-right - u'*' # 0x002a -> ASTERISK, left-right - u'+' # 0x002b -> PLUS SIGN, left-right - u',' # 0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - u'-' # 0x002d -> HYPHEN-MINUS, left-right - u'.' # 0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - u'/' # 0x002f -> SOLIDUS, left-right - u'0' # 0x0030 -> DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO - u'1' # 0x0031 -> DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE - u'2' # 0x0032 -> DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO - u'3' # 0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE - u'4' # 0x0034 -> DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR - u'5' # 0x0035 -> DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE - u'6' # 0x0036 -> DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX - u'7' # 0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN - u'8' # 0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT - u'9' # 0x0039 -> DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE - u':' # 0x003a -> COLON, left-right - u';' # 0x003b -> SEMICOLON, left-right - u'<' # 0x003c -> LESS-THAN SIGN, left-right - u'=' # 0x003d -> EQUALS SIGN, left-right - u'>' # 0x003e -> GREATER-THAN SIGN, left-right - u'?' # 0x003f -> QUESTION MARK, left-right - u'@' # 0x0040 -> COMMERCIAL AT - u'A' # 0x0041 -> LATIN CAPITAL LETTER A - u'B' # 0x0042 -> LATIN CAPITAL LETTER B - u'C' # 0x0043 -> LATIN CAPITAL LETTER C - u'D' # 0x0044 -> LATIN CAPITAL LETTER D - u'E' # 0x0045 -> LATIN CAPITAL LETTER E - u'F' # 0x0046 -> LATIN CAPITAL LETTER F - u'G' # 0x0047 -> LATIN CAPITAL LETTER G - u'H' # 0x0048 -> LATIN CAPITAL LETTER H - u'I' # 0x0049 -> LATIN CAPITAL LETTER I - u'J' # 0x004a -> LATIN CAPITAL LETTER J - u'K' # 0x004b -> LATIN CAPITAL LETTER K - u'L' # 0x004c -> LATIN CAPITAL LETTER L - u'M' # 0x004d -> LATIN CAPITAL LETTER M - u'N' # 0x004e -> LATIN CAPITAL LETTER N - u'O' # 0x004f -> LATIN CAPITAL LETTER O - u'P' # 0x0050 -> LATIN CAPITAL LETTER P - u'Q' # 0x0051 -> LATIN CAPITAL LETTER Q - u'R' # 0x0052 -> LATIN CAPITAL LETTER R - u'S' # 0x0053 -> LATIN CAPITAL LETTER S - u'T' # 0x0054 -> LATIN CAPITAL LETTER T - u'U' # 0x0055 -> LATIN CAPITAL LETTER U - u'V' # 0x0056 -> LATIN CAPITAL LETTER V - u'W' # 0x0057 -> LATIN CAPITAL LETTER W - u'X' # 0x0058 -> LATIN CAPITAL LETTER X - u'Y' # 0x0059 -> LATIN CAPITAL LETTER Y - u'Z' # 0x005a -> LATIN CAPITAL LETTER Z - u'[' # 0x005b -> LEFT SQUARE BRACKET, left-right - u'\\' # 0x005c -> REVERSE SOLIDUS, left-right - u']' # 0x005d -> RIGHT SQUARE BRACKET, left-right - u'^' # 0x005e -> CIRCUMFLEX ACCENT, left-right - u'_' # 0x005f -> LOW LINE, left-right - u'`' # 0x0060 -> GRAVE ACCENT - u'a' # 0x0061 -> LATIN SMALL LETTER A - u'b' # 0x0062 -> LATIN SMALL LETTER B - u'c' # 0x0063 -> LATIN SMALL LETTER C - u'd' # 0x0064 -> LATIN SMALL LETTER D - u'e' # 0x0065 -> LATIN SMALL LETTER E - u'f' # 0x0066 -> LATIN SMALL LETTER F - u'g' # 0x0067 -> LATIN SMALL LETTER G - u'h' # 0x0068 -> LATIN SMALL LETTER H - u'i' # 0x0069 -> LATIN SMALL LETTER I - u'j' # 0x006a -> LATIN SMALL LETTER J - u'k' # 0x006b -> LATIN SMALL LETTER K - u'l' # 0x006c -> LATIN SMALL LETTER L - u'm' # 0x006d -> LATIN SMALL LETTER M - u'n' # 0x006e -> LATIN SMALL LETTER N - u'o' # 0x006f -> LATIN SMALL LETTER O - u'p' # 0x0070 -> LATIN SMALL LETTER P - u'q' # 0x0071 -> LATIN SMALL LETTER Q - u'r' # 0x0072 -> LATIN SMALL LETTER R - u's' # 0x0073 -> LATIN SMALL LETTER S - u't' # 0x0074 -> LATIN SMALL LETTER T - u'u' # 0x0075 -> LATIN SMALL LETTER U - u'v' # 0x0076 -> LATIN SMALL LETTER V - u'w' # 0x0077 -> LATIN SMALL LETTER W - u'x' # 0x0078 -> LATIN SMALL LETTER X - u'y' # 0x0079 -> LATIN SMALL LETTER Y - u'z' # 0x007a -> LATIN SMALL LETTER Z - u'{' # 0x007b -> LEFT CURLY BRACKET, left-right - u'|' # 0x007c -> VERTICAL LINE, left-right - u'}' # 0x007d -> RIGHT CURLY BRACKET, left-right - u'~' # 0x007e -> TILDE - u'\x7f' # 0x007f -> CONTROL CHARACTER - u'\xc4' # 0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xa0' # 0x0081 -> NO-BREAK SPACE, right-left - u'\xc7' # 0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x0084 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x0087 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x0088 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x008a -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u06ba' # 0x008b -> ARABIC LETTER NOON GHUNNA - u'\xab' # 0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xe7' # 0x008d -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x008e -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x008f -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x0092 -> LATIN SMALL LETTER I WITH ACUTE - u'\u2026' # 0x0093 -> HORIZONTAL ELLIPSIS, right-left - u'\xee' # 0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x0096 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x0097 -> LATIN SMALL LETTER O WITH ACUTE - u'\xbb' # 0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xf4' # 0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x009a -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0x009b -> DIVISION SIGN, right-left - u'\xfa' # 0x009c -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x009d -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x009f -> LATIN SMALL LETTER U WITH DIAERESIS - u' ' # 0x00a0 -> SPACE, right-left - u'!' # 0x00a1 -> EXCLAMATION MARK, right-left - u'"' # 0x00a2 -> QUOTATION MARK, right-left - u'#' # 0x00a3 -> NUMBER SIGN, right-left - u'$' # 0x00a4 -> DOLLAR SIGN, right-left - u'\u066a' # 0x00a5 -> ARABIC PERCENT SIGN - u'&' # 0x00a6 -> AMPERSAND, right-left - u"'" # 0x00a7 -> APOSTROPHE, right-left - u'(' # 0x00a8 -> LEFT PARENTHESIS, right-left - u')' # 0x00a9 -> RIGHT PARENTHESIS, right-left - u'*' # 0x00aa -> ASTERISK, right-left - u'+' # 0x00ab -> PLUS SIGN, right-left - u'\u060c' # 0x00ac -> ARABIC COMMA - u'-' # 0x00ad -> HYPHEN-MINUS, right-left - u'.' # 0x00ae -> FULL STOP, right-left - u'/' # 0x00af -> SOLIDUS, right-left - u'\u0660' # 0x00b0 -> ARABIC-INDIC DIGIT ZERO, right-left (need override) - u'\u0661' # 0x00b1 -> ARABIC-INDIC DIGIT ONE, right-left (need override) - u'\u0662' # 0x00b2 -> ARABIC-INDIC DIGIT TWO, right-left (need override) - u'\u0663' # 0x00b3 -> ARABIC-INDIC DIGIT THREE, right-left (need override) - u'\u0664' # 0x00b4 -> ARABIC-INDIC DIGIT FOUR, right-left (need override) - u'\u0665' # 0x00b5 -> ARABIC-INDIC DIGIT FIVE, right-left (need override) - u'\u0666' # 0x00b6 -> ARABIC-INDIC DIGIT SIX, right-left (need override) - u'\u0667' # 0x00b7 -> ARABIC-INDIC DIGIT SEVEN, right-left (need override) - u'\u0668' # 0x00b8 -> ARABIC-INDIC DIGIT EIGHT, right-left (need override) - u'\u0669' # 0x00b9 -> ARABIC-INDIC DIGIT NINE, right-left (need override) - u':' # 0x00ba -> COLON, right-left - u'\u061b' # 0x00bb -> ARABIC SEMICOLON - u'<' # 0x00bc -> LESS-THAN SIGN, right-left - u'=' # 0x00bd -> EQUALS SIGN, right-left - u'>' # 0x00be -> GREATER-THAN SIGN, right-left - u'\u061f' # 0x00bf -> ARABIC QUESTION MARK - u'\u274a' # 0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - u'\u0621' # 0x00c1 -> ARABIC LETTER HAMZA - u'\u0622' # 0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0x00c7 -> ARABIC LETTER ALEF - u'\u0628' # 0x00c8 -> ARABIC LETTER BEH - u'\u0629' # 0x00c9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0x00ca -> ARABIC LETTER TEH - u'\u062b' # 0x00cb -> ARABIC LETTER THEH - u'\u062c' # 0x00cc -> ARABIC LETTER JEEM - u'\u062d' # 0x00cd -> ARABIC LETTER HAH - u'\u062e' # 0x00ce -> ARABIC LETTER KHAH - u'\u062f' # 0x00cf -> ARABIC LETTER DAL - u'\u0630' # 0x00d0 -> ARABIC LETTER THAL - u'\u0631' # 0x00d1 -> ARABIC LETTER REH - u'\u0632' # 0x00d2 -> ARABIC LETTER ZAIN - u'\u0633' # 0x00d3 -> ARABIC LETTER SEEN - u'\u0634' # 0x00d4 -> ARABIC LETTER SHEEN - u'\u0635' # 0x00d5 -> ARABIC LETTER SAD - u'\u0636' # 0x00d6 -> ARABIC LETTER DAD - u'\u0637' # 0x00d7 -> ARABIC LETTER TAH - u'\u0638' # 0x00d8 -> ARABIC LETTER ZAH - u'\u0639' # 0x00d9 -> ARABIC LETTER AIN - u'\u063a' # 0x00da -> ARABIC LETTER GHAIN - u'[' # 0x00db -> LEFT SQUARE BRACKET, right-left - u'\\' # 0x00dc -> REVERSE SOLIDUS, right-left - u']' # 0x00dd -> RIGHT SQUARE BRACKET, right-left - u'^' # 0x00de -> CIRCUMFLEX ACCENT, right-left - u'_' # 0x00df -> LOW LINE, right-left - u'\u0640' # 0x00e0 -> ARABIC TATWEEL - u'\u0641' # 0x00e1 -> ARABIC LETTER FEH - u'\u0642' # 0x00e2 -> ARABIC LETTER QAF - u'\u0643' # 0x00e3 -> ARABIC LETTER KAF - u'\u0644' # 0x00e4 -> ARABIC LETTER LAM - u'\u0645' # 0x00e5 -> ARABIC LETTER MEEM - u'\u0646' # 0x00e6 -> ARABIC LETTER NOON - u'\u0647' # 0x00e7 -> ARABIC LETTER HEH - u'\u0648' # 0x00e8 -> ARABIC LETTER WAW - u'\u0649' # 0x00e9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0x00ea -> ARABIC LETTER YEH - u'\u064b' # 0x00eb -> ARABIC FATHATAN - u'\u064c' # 0x00ec -> ARABIC DAMMATAN - u'\u064d' # 0x00ed -> ARABIC KASRATAN - u'\u064e' # 0x00ee -> ARABIC FATHA - u'\u064f' # 0x00ef -> ARABIC DAMMA - u'\u0650' # 0x00f0 -> ARABIC KASRA - u'\u0651' # 0x00f1 -> ARABIC SHADDA - u'\u0652' # 0x00f2 -> ARABIC SUKUN - u'\u067e' # 0x00f3 -> ARABIC LETTER PEH - u'\u0679' # 0x00f4 -> ARABIC LETTER TTEH - u'\u0686' # 0x00f5 -> ARABIC LETTER TCHEH - u'\u06d5' # 0x00f6 -> ARABIC LETTER AE - u'\u06a4' # 0x00f7 -> ARABIC LETTER VEH - u'\u06af' # 0x00f8 -> ARABIC LETTER GAF - u'\u0688' # 0x00f9 -> ARABIC LETTER DDAL - u'\u0691' # 0x00fa -> ARABIC LETTER RREH - u'{' # 0x00fb -> LEFT CURLY BRACKET, right-left - u'|' # 0x00fc -> VERTICAL LINE, right-left - u'}' # 0x00fd -> RIGHT CURLY BRACKET, right-left - u'\u0698' # 0x00fe -> ARABIC LETTER JEH - u'\u06d2' # 0x00ff -> ARABIC LETTER YEH BARREE -) - -### Encoding Map - -encoding_map = { - 0x0000: 0x0000, # CONTROL CHARACTER - 0x0001: 0x0001, # CONTROL CHARACTER - 0x0002: 0x0002, # CONTROL CHARACTER - 0x0003: 0x0003, # CONTROL CHARACTER - 0x0004: 0x0004, # CONTROL CHARACTER - 0x0005: 0x0005, # CONTROL CHARACTER - 0x0006: 0x0006, # CONTROL CHARACTER - 0x0007: 0x0007, # CONTROL CHARACTER - 0x0008: 0x0008, # CONTROL CHARACTER - 0x0009: 0x0009, # CONTROL CHARACTER - 0x000a: 0x000a, # CONTROL CHARACTER - 0x000b: 0x000b, # CONTROL CHARACTER - 0x000c: 0x000c, # CONTROL CHARACTER - 0x000d: 0x000d, # CONTROL CHARACTER - 0x000e: 0x000e, # CONTROL CHARACTER - 0x000f: 0x000f, # CONTROL CHARACTER - 0x0010: 0x0010, # CONTROL CHARACTER - 0x0011: 0x0011, # CONTROL CHARACTER - 0x0012: 0x0012, # CONTROL CHARACTER - 0x0013: 0x0013, # CONTROL CHARACTER - 0x0014: 0x0014, # CONTROL CHARACTER - 0x0015: 0x0015, # CONTROL CHARACTER - 0x0016: 0x0016, # CONTROL CHARACTER - 0x0017: 0x0017, # CONTROL CHARACTER - 0x0018: 0x0018, # CONTROL CHARACTER - 0x0019: 0x0019, # CONTROL CHARACTER - 0x001a: 0x001a, # CONTROL CHARACTER - 0x001b: 0x001b, # CONTROL CHARACTER - 0x001c: 0x001c, # CONTROL CHARACTER - 0x001d: 0x001d, # CONTROL CHARACTER - 0x001e: 0x001e, # CONTROL CHARACTER - 0x001f: 0x001f, # CONTROL CHARACTER - 0x0020: 0x0020, # SPACE, left-right - 0x0020: 0x00a0, # SPACE, right-left - 0x0021: 0x0021, # EXCLAMATION MARK, left-right - 0x0021: 0x00a1, # EXCLAMATION MARK, right-left - 0x0022: 0x0022, # QUOTATION MARK, left-right - 0x0022: 0x00a2, # QUOTATION MARK, right-left - 0x0023: 0x0023, # NUMBER SIGN, left-right - 0x0023: 0x00a3, # NUMBER SIGN, right-left - 0x0024: 0x0024, # DOLLAR SIGN, left-right - 0x0024: 0x00a4, # DOLLAR SIGN, right-left - 0x0025: 0x0025, # PERCENT SIGN, left-right - 0x0026: 0x0026, # AMPERSAND, left-right - 0x0026: 0x00a6, # AMPERSAND, right-left - 0x0027: 0x0027, # APOSTROPHE, left-right - 0x0027: 0x00a7, # APOSTROPHE, right-left - 0x0028: 0x0028, # LEFT PARENTHESIS, left-right - 0x0028: 0x00a8, # LEFT PARENTHESIS, right-left - 0x0029: 0x0029, # RIGHT PARENTHESIS, left-right - 0x0029: 0x00a9, # RIGHT PARENTHESIS, right-left - 0x002a: 0x002a, # ASTERISK, left-right - 0x002a: 0x00aa, # ASTERISK, right-left - 0x002b: 0x002b, # PLUS SIGN, left-right - 0x002b: 0x00ab, # PLUS SIGN, right-left - 0x002c: 0x002c, # COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - 0x002d: 0x002d, # HYPHEN-MINUS, left-right - 0x002d: 0x00ad, # HYPHEN-MINUS, right-left - 0x002e: 0x002e, # FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - 0x002e: 0x00ae, # FULL STOP, right-left - 0x002f: 0x002f, # SOLIDUS, left-right - 0x002f: 0x00af, # SOLIDUS, right-left - 0x0030: 0x0030, # DIGIT ZERO; in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO - 0x0031: 0x0031, # DIGIT ONE; in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE - 0x0032: 0x0032, # DIGIT TWO; in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO - 0x0033: 0x0033, # DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE - 0x0034: 0x0034, # DIGIT FOUR; in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR - 0x0035: 0x0035, # DIGIT FIVE; in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE - 0x0036: 0x0036, # DIGIT SIX; in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX - 0x0037: 0x0037, # DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN - 0x0038: 0x0038, # DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT - 0x0039: 0x0039, # DIGIT NINE; in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE - 0x003a: 0x003a, # COLON, left-right - 0x003a: 0x00ba, # COLON, right-left - 0x003b: 0x003b, # SEMICOLON, left-right - 0x003c: 0x003c, # LESS-THAN SIGN, left-right - 0x003c: 0x00bc, # LESS-THAN SIGN, right-left - 0x003d: 0x003d, # EQUALS SIGN, left-right - 0x003d: 0x00bd, # EQUALS SIGN, right-left - 0x003e: 0x003e, # GREATER-THAN SIGN, left-right - 0x003e: 0x00be, # GREATER-THAN SIGN, right-left - 0x003f: 0x003f, # QUESTION MARK, left-right - 0x0040: 0x0040, # COMMERCIAL AT - 0x0041: 0x0041, # LATIN CAPITAL LETTER A - 0x0042: 0x0042, # LATIN CAPITAL LETTER B - 0x0043: 0x0043, # LATIN CAPITAL LETTER C - 0x0044: 0x0044, # LATIN CAPITAL LETTER D - 0x0045: 0x0045, # LATIN CAPITAL LETTER E - 0x0046: 0x0046, # LATIN CAPITAL LETTER F - 0x0047: 0x0047, # LATIN CAPITAL LETTER G - 0x0048: 0x0048, # LATIN CAPITAL LETTER H - 0x0049: 0x0049, # LATIN CAPITAL LETTER I - 0x004a: 0x004a, # LATIN CAPITAL LETTER J - 0x004b: 0x004b, # LATIN CAPITAL LETTER K - 0x004c: 0x004c, # LATIN CAPITAL LETTER L - 0x004d: 0x004d, # LATIN CAPITAL LETTER M - 0x004e: 0x004e, # LATIN CAPITAL LETTER N - 0x004f: 0x004f, # LATIN CAPITAL LETTER O - 0x0050: 0x0050, # LATIN CAPITAL LETTER P - 0x0051: 0x0051, # LATIN CAPITAL LETTER Q - 0x0052: 0x0052, # LATIN CAPITAL LETTER R - 0x0053: 0x0053, # LATIN CAPITAL LETTER S - 0x0054: 0x0054, # LATIN CAPITAL LETTER T - 0x0055: 0x0055, # LATIN CAPITAL LETTER U - 0x0056: 0x0056, # LATIN CAPITAL LETTER V - 0x0057: 0x0057, # LATIN CAPITAL LETTER W - 0x0058: 0x0058, # LATIN CAPITAL LETTER X - 0x0059: 0x0059, # LATIN CAPITAL LETTER Y - 0x005a: 0x005a, # LATIN CAPITAL LETTER Z - 0x005b: 0x005b, # LEFT SQUARE BRACKET, left-right - 0x005b: 0x00db, # LEFT SQUARE BRACKET, right-left - 0x005c: 0x005c, # REVERSE SOLIDUS, left-right - 0x005c: 0x00dc, # REVERSE SOLIDUS, right-left - 0x005d: 0x005d, # RIGHT SQUARE BRACKET, left-right - 0x005d: 0x00dd, # RIGHT SQUARE BRACKET, right-left - 0x005e: 0x005e, # CIRCUMFLEX ACCENT, left-right - 0x005e: 0x00de, # CIRCUMFLEX ACCENT, right-left - 0x005f: 0x005f, # LOW LINE, left-right - 0x005f: 0x00df, # LOW LINE, right-left - 0x0060: 0x0060, # GRAVE ACCENT - 0x0061: 0x0061, # LATIN SMALL LETTER A - 0x0062: 0x0062, # LATIN SMALL LETTER B - 0x0063: 0x0063, # LATIN SMALL LETTER C - 0x0064: 0x0064, # LATIN SMALL LETTER D - 0x0065: 0x0065, # LATIN SMALL LETTER E - 0x0066: 0x0066, # LATIN SMALL LETTER F - 0x0067: 0x0067, # LATIN SMALL LETTER G - 0x0068: 0x0068, # LATIN SMALL LETTER H - 0x0069: 0x0069, # LATIN SMALL LETTER I - 0x006a: 0x006a, # LATIN SMALL LETTER J - 0x006b: 0x006b, # LATIN SMALL LETTER K - 0x006c: 0x006c, # LATIN SMALL LETTER L - 0x006d: 0x006d, # LATIN SMALL LETTER M - 0x006e: 0x006e, # LATIN SMALL LETTER N - 0x006f: 0x006f, # LATIN SMALL LETTER O - 0x0070: 0x0070, # LATIN SMALL LETTER P - 0x0071: 0x0071, # LATIN SMALL LETTER Q - 0x0072: 0x0072, # LATIN SMALL LETTER R - 0x0073: 0x0073, # LATIN SMALL LETTER S - 0x0074: 0x0074, # LATIN SMALL LETTER T - 0x0075: 0x0075, # LATIN SMALL LETTER U - 0x0076: 0x0076, # LATIN SMALL LETTER V - 0x0077: 0x0077, # LATIN SMALL LETTER W - 0x0078: 0x0078, # LATIN SMALL LETTER X - 0x0079: 0x0079, # LATIN SMALL LETTER Y - 0x007a: 0x007a, # LATIN SMALL LETTER Z - 0x007b: 0x007b, # LEFT CURLY BRACKET, left-right - 0x007b: 0x00fb, # LEFT CURLY BRACKET, right-left - 0x007c: 0x007c, # VERTICAL LINE, left-right - 0x007c: 0x00fc, # VERTICAL LINE, right-left - 0x007d: 0x007d, # RIGHT CURLY BRACKET, left-right - 0x007d: 0x00fd, # RIGHT CURLY BRACKET, right-left - 0x007e: 0x007e, # TILDE - 0x007f: 0x007f, # CONTROL CHARACTER - 0x00a0: 0x0081, # NO-BREAK SPACE, right-left - 0x00ab: 0x008c, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00bb: 0x0098, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - 0x00c4: 0x0080, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x00c7: 0x0082, # LATIN CAPITAL LETTER C WITH CEDILLA - 0x00c9: 0x0083, # LATIN CAPITAL LETTER E WITH ACUTE - 0x00d1: 0x0084, # LATIN CAPITAL LETTER N WITH TILDE - 0x00d6: 0x0085, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x00dc: 0x0086, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x00e0: 0x0088, # LATIN SMALL LETTER A WITH GRAVE - 0x00e1: 0x0087, # LATIN SMALL LETTER A WITH ACUTE - 0x00e2: 0x0089, # LATIN SMALL LETTER A WITH CIRCUMFLEX - 0x00e4: 0x008a, # LATIN SMALL LETTER A WITH DIAERESIS - 0x00e7: 0x008d, # LATIN SMALL LETTER C WITH CEDILLA - 0x00e8: 0x008f, # LATIN SMALL LETTER E WITH GRAVE - 0x00e9: 0x008e, # LATIN SMALL LETTER E WITH ACUTE - 0x00ea: 0x0090, # LATIN SMALL LETTER E WITH CIRCUMFLEX - 0x00eb: 0x0091, # LATIN SMALL LETTER E WITH DIAERESIS - 0x00ed: 0x0092, # LATIN SMALL LETTER I WITH ACUTE - 0x00ee: 0x0094, # LATIN SMALL LETTER I WITH CIRCUMFLEX - 0x00ef: 0x0095, # LATIN SMALL LETTER I WITH DIAERESIS - 0x00f1: 0x0096, # LATIN SMALL LETTER N WITH TILDE - 0x00f3: 0x0097, # LATIN SMALL LETTER O WITH ACUTE - 0x00f4: 0x0099, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x00f6: 0x009a, # LATIN SMALL LETTER O WITH DIAERESIS - 0x00f7: 0x009b, # DIVISION SIGN, right-left - 0x00f9: 0x009d, # LATIN SMALL LETTER U WITH GRAVE - 0x00fa: 0x009c, # LATIN SMALL LETTER U WITH ACUTE - 0x00fb: 0x009e, # LATIN SMALL LETTER U WITH CIRCUMFLEX - 0x00fc: 0x009f, # LATIN SMALL LETTER U WITH DIAERESIS - 0x060c: 0x00ac, # ARABIC COMMA - 0x061b: 0x00bb, # ARABIC SEMICOLON - 0x061f: 0x00bf, # ARABIC QUESTION MARK - 0x0621: 0x00c1, # ARABIC LETTER HAMZA - 0x0622: 0x00c2, # ARABIC LETTER ALEF WITH MADDA ABOVE - 0x0623: 0x00c3, # ARABIC LETTER ALEF WITH HAMZA ABOVE - 0x0624: 0x00c4, # ARABIC LETTER WAW WITH HAMZA ABOVE - 0x0625: 0x00c5, # ARABIC LETTER ALEF WITH HAMZA BELOW - 0x0626: 0x00c6, # ARABIC LETTER YEH WITH HAMZA ABOVE - 0x0627: 0x00c7, # ARABIC LETTER ALEF - 0x0628: 0x00c8, # ARABIC LETTER BEH - 0x0629: 0x00c9, # ARABIC LETTER TEH MARBUTA - 0x062a: 0x00ca, # ARABIC LETTER TEH - 0x062b: 0x00cb, # ARABIC LETTER THEH - 0x062c: 0x00cc, # ARABIC LETTER JEEM - 0x062d: 0x00cd, # ARABIC LETTER HAH - 0x062e: 0x00ce, # ARABIC LETTER KHAH - 0x062f: 0x00cf, # ARABIC LETTER DAL - 0x0630: 0x00d0, # ARABIC LETTER THAL - 0x0631: 0x00d1, # ARABIC LETTER REH - 0x0632: 0x00d2, # ARABIC LETTER ZAIN - 0x0633: 0x00d3, # ARABIC LETTER SEEN - 0x0634: 0x00d4, # ARABIC LETTER SHEEN - 0x0635: 0x00d5, # ARABIC LETTER SAD - 0x0636: 0x00d6, # ARABIC LETTER DAD - 0x0637: 0x00d7, # ARABIC LETTER TAH - 0x0638: 0x00d8, # ARABIC LETTER ZAH - 0x0639: 0x00d9, # ARABIC LETTER AIN - 0x063a: 0x00da, # ARABIC LETTER GHAIN - 0x0640: 0x00e0, # ARABIC TATWEEL - 0x0641: 0x00e1, # ARABIC LETTER FEH - 0x0642: 0x00e2, # ARABIC LETTER QAF - 0x0643: 0x00e3, # ARABIC LETTER KAF - 0x0644: 0x00e4, # ARABIC LETTER LAM - 0x0645: 0x00e5, # ARABIC LETTER MEEM - 0x0646: 0x00e6, # ARABIC LETTER NOON - 0x0647: 0x00e7, # ARABIC LETTER HEH - 0x0648: 0x00e8, # ARABIC LETTER WAW - 0x0649: 0x00e9, # ARABIC LETTER ALEF MAKSURA - 0x064a: 0x00ea, # ARABIC LETTER YEH - 0x064b: 0x00eb, # ARABIC FATHATAN - 0x064c: 0x00ec, # ARABIC DAMMATAN - 0x064d: 0x00ed, # ARABIC KASRATAN - 0x064e: 0x00ee, # ARABIC FATHA - 0x064f: 0x00ef, # ARABIC DAMMA - 0x0650: 0x00f0, # ARABIC KASRA - 0x0651: 0x00f1, # ARABIC SHADDA - 0x0652: 0x00f2, # ARABIC SUKUN - 0x0660: 0x00b0, # ARABIC-INDIC DIGIT ZERO, right-left (need override) - 0x0661: 0x00b1, # ARABIC-INDIC DIGIT ONE, right-left (need override) - 0x0662: 0x00b2, # ARABIC-INDIC DIGIT TWO, right-left (need override) - 0x0663: 0x00b3, # ARABIC-INDIC DIGIT THREE, right-left (need override) - 0x0664: 0x00b4, # ARABIC-INDIC DIGIT FOUR, right-left (need override) - 0x0665: 0x00b5, # ARABIC-INDIC DIGIT FIVE, right-left (need override) - 0x0666: 0x00b6, # ARABIC-INDIC DIGIT SIX, right-left (need override) - 0x0667: 0x00b7, # ARABIC-INDIC DIGIT SEVEN, right-left (need override) - 0x0668: 0x00b8, # ARABIC-INDIC DIGIT EIGHT, right-left (need override) - 0x0669: 0x00b9, # ARABIC-INDIC DIGIT NINE, right-left (need override) - 0x066a: 0x00a5, # ARABIC PERCENT SIGN - 0x0679: 0x00f4, # ARABIC LETTER TTEH - 0x067e: 0x00f3, # ARABIC LETTER PEH - 0x0686: 0x00f5, # ARABIC LETTER TCHEH - 0x0688: 0x00f9, # ARABIC LETTER DDAL - 0x0691: 0x00fa, # ARABIC LETTER RREH - 0x0698: 0x00fe, # ARABIC LETTER JEH - 0x06a4: 0x00f7, # ARABIC LETTER VEH - 0x06af: 0x00f8, # ARABIC LETTER GAF - 0x06ba: 0x008b, # ARABIC LETTER NOON GHUNNA - 0x06d2: 0x00ff, # ARABIC LETTER YEH BARREE - 0x06d5: 0x00f6, # ARABIC LETTER AE - 0x2026: 0x0093, # HORIZONTAL ELLIPSIS, right-left - 0x274a: 0x00c0, # EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left -}
--- a/test/lib/python2.7/encodings/mac_centeuro.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_centeuro generated from 'MAPPINGS/VENDORS/APPLE/CENTEURO.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-centeuro', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\u0100' # 0x81 -> LATIN CAPITAL LETTER A WITH MACRON - u'\u0101' # 0x82 -> LATIN SMALL LETTER A WITH MACRON - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\u0104' # 0x84 -> LATIN CAPITAL LETTER A WITH OGONEK - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\u0105' # 0x88 -> LATIN SMALL LETTER A WITH OGONEK - u'\u010c' # 0x89 -> LATIN CAPITAL LETTER C WITH CARON - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u010d' # 0x8B -> LATIN SMALL LETTER C WITH CARON - u'\u0106' # 0x8C -> LATIN CAPITAL LETTER C WITH ACUTE - u'\u0107' # 0x8D -> LATIN SMALL LETTER C WITH ACUTE - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\u0179' # 0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE - u'\u017a' # 0x90 -> LATIN SMALL LETTER Z WITH ACUTE - u'\u010e' # 0x91 -> LATIN CAPITAL LETTER D WITH CARON - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\u010f' # 0x93 -> LATIN SMALL LETTER D WITH CARON - u'\u0112' # 0x94 -> LATIN CAPITAL LETTER E WITH MACRON - u'\u0113' # 0x95 -> LATIN SMALL LETTER E WITH MACRON - u'\u0116' # 0x96 -> LATIN CAPITAL LETTER E WITH DOT ABOVE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\u0117' # 0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\u011a' # 0x9D -> LATIN CAPITAL LETTER E WITH CARON - u'\u011b' # 0x9E -> LATIN SMALL LETTER E WITH CARON - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\u0118' # 0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\u0119' # 0xAB -> LATIN SMALL LETTER E WITH OGONEK - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u0123' # 0xAE -> LATIN SMALL LETTER G WITH CEDILLA - u'\u012e' # 0xAF -> LATIN CAPITAL LETTER I WITH OGONEK - u'\u012f' # 0xB0 -> LATIN SMALL LETTER I WITH OGONEK - u'\u012a' # 0xB1 -> LATIN CAPITAL LETTER I WITH MACRON - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\u012b' # 0xB4 -> LATIN SMALL LETTER I WITH MACRON - u'\u0136' # 0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u0142' # 0xB8 -> LATIN SMALL LETTER L WITH STROKE - u'\u013b' # 0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA - u'\u013c' # 0xBA -> LATIN SMALL LETTER L WITH CEDILLA - u'\u013d' # 0xBB -> LATIN CAPITAL LETTER L WITH CARON - u'\u013e' # 0xBC -> LATIN SMALL LETTER L WITH CARON - u'\u0139' # 0xBD -> LATIN CAPITAL LETTER L WITH ACUTE - u'\u013a' # 0xBE -> LATIN SMALL LETTER L WITH ACUTE - u'\u0145' # 0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA - u'\u0146' # 0xC0 -> LATIN SMALL LETTER N WITH CEDILLA - u'\u0143' # 0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0144' # 0xC4 -> LATIN SMALL LETTER N WITH ACUTE - u'\u0147' # 0xC5 -> LATIN CAPITAL LETTER N WITH CARON - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\u0148' # 0xCB -> LATIN SMALL LETTER N WITH CARON - u'\u0150' # 0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0151' # 0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE - u'\u014c' # 0xCF -> LATIN CAPITAL LETTER O WITH MACRON - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\u014d' # 0xD8 -> LATIN SMALL LETTER O WITH MACRON - u'\u0154' # 0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE - u'\u0155' # 0xDA -> LATIN SMALL LETTER R WITH ACUTE - u'\u0158' # 0xDB -> LATIN CAPITAL LETTER R WITH CARON - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u0159' # 0xDE -> LATIN SMALL LETTER R WITH CARON - u'\u0156' # 0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA - u'\u0157' # 0xE0 -> LATIN SMALL LETTER R WITH CEDILLA - u'\u0160' # 0xE1 -> LATIN CAPITAL LETTER S WITH CARON - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u0161' # 0xE4 -> LATIN SMALL LETTER S WITH CARON - u'\u015a' # 0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE - u'\u015b' # 0xE6 -> LATIN SMALL LETTER S WITH ACUTE - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\u0164' # 0xE8 -> LATIN CAPITAL LETTER T WITH CARON - u'\u0165' # 0xE9 -> LATIN SMALL LETTER T WITH CARON - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\u017d' # 0xEB -> LATIN CAPITAL LETTER Z WITH CARON - u'\u017e' # 0xEC -> LATIN SMALL LETTER Z WITH CARON - u'\u016a' # 0xED -> LATIN CAPITAL LETTER U WITH MACRON - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u016b' # 0xF0 -> LATIN SMALL LETTER U WITH MACRON - u'\u016e' # 0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\u016f' # 0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE - u'\u0170' # 0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - u'\u0171' # 0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE - u'\u0172' # 0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK - u'\u0173' # 0xF7 -> LATIN SMALL LETTER U WITH OGONEK - u'\xdd' # 0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xfd' # 0xF9 -> LATIN SMALL LETTER Y WITH ACUTE - u'\u0137' # 0xFA -> LATIN SMALL LETTER K WITH CEDILLA - u'\u017b' # 0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE - u'\u0141' # 0xFC -> LATIN CAPITAL LETTER L WITH STROKE - u'\u017c' # 0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE - u'\u0122' # 0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/mac_croatian.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_croatian generated from 'MAPPINGS/VENDORS/APPLE/CROATIAN.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-croatian', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\u0160' # 0xA9 -> LATIN CAPITAL LETTER S WITH CARON - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u017d' # 0xAE -> LATIN CAPITAL LETTER Z WITH CARON - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\u2206' # 0xB4 -> INCREMENT - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u0161' # 0xB9 -> LATIN SMALL LETTER S WITH CARON - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\u017e' # 0xBE -> LATIN SMALL LETTER Z WITH CARON - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u0106' # 0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u010c' # 0xC8 -> LATIN CAPITAL LETTER C WITH CARON - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u0110' # 0xD0 -> LATIN CAPITAL LETTER D WITH STROKE - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\uf8ff' # 0xD8 -> Apple logo - u'\xa9' # 0xD9 -> COPYRIGHT SIGN - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\xc6' # 0xDE -> LATIN CAPITAL LETTER AE - u'\xbb' # 0xDF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2013' # 0xE0 -> EN DASH - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\u0107' # 0xE6 -> LATIN SMALL LETTER C WITH ACUTE - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\u010d' # 0xE8 -> LATIN SMALL LETTER C WITH CARON - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\u0111' # 0xF0 -> LATIN SMALL LETTER D WITH STROKE - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u03c0' # 0xF9 -> GREEK SMALL LETTER PI - u'\xcb' # 0xFA -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\xca' # 0xFD -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xe6' # 0xFE -> LATIN SMALL LETTER AE - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/mac_cyrillic.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_cyrillic generated from 'MAPPINGS/VENDORS/APPLE/CYRILLIC.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-cyrillic', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\u0410' # 0x80 -> CYRILLIC CAPITAL LETTER A - u'\u0411' # 0x81 -> CYRILLIC CAPITAL LETTER BE - u'\u0412' # 0x82 -> CYRILLIC CAPITAL LETTER VE - u'\u0413' # 0x83 -> CYRILLIC CAPITAL LETTER GHE - u'\u0414' # 0x84 -> CYRILLIC CAPITAL LETTER DE - u'\u0415' # 0x85 -> CYRILLIC CAPITAL LETTER IE - u'\u0416' # 0x86 -> CYRILLIC CAPITAL LETTER ZHE - u'\u0417' # 0x87 -> CYRILLIC CAPITAL LETTER ZE - u'\u0418' # 0x88 -> CYRILLIC CAPITAL LETTER I - u'\u0419' # 0x89 -> CYRILLIC CAPITAL LETTER SHORT I - u'\u041a' # 0x8A -> CYRILLIC CAPITAL LETTER KA - u'\u041b' # 0x8B -> CYRILLIC CAPITAL LETTER EL - u'\u041c' # 0x8C -> CYRILLIC CAPITAL LETTER EM - u'\u041d' # 0x8D -> CYRILLIC CAPITAL LETTER EN - u'\u041e' # 0x8E -> CYRILLIC CAPITAL LETTER O - u'\u041f' # 0x8F -> CYRILLIC CAPITAL LETTER PE - u'\u0420' # 0x90 -> CYRILLIC CAPITAL LETTER ER - u'\u0421' # 0x91 -> CYRILLIC CAPITAL LETTER ES - u'\u0422' # 0x92 -> CYRILLIC CAPITAL LETTER TE - u'\u0423' # 0x93 -> CYRILLIC CAPITAL LETTER U - u'\u0424' # 0x94 -> CYRILLIC CAPITAL LETTER EF - u'\u0425' # 0x95 -> CYRILLIC CAPITAL LETTER HA - u'\u0426' # 0x96 -> CYRILLIC CAPITAL LETTER TSE - u'\u0427' # 0x97 -> CYRILLIC CAPITAL LETTER CHE - u'\u0428' # 0x98 -> CYRILLIC CAPITAL LETTER SHA - u'\u0429' # 0x99 -> CYRILLIC CAPITAL LETTER SHCHA - u'\u042a' # 0x9A -> CYRILLIC CAPITAL LETTER HARD SIGN - u'\u042b' # 0x9B -> CYRILLIC CAPITAL LETTER YERU - u'\u042c' # 0x9C -> CYRILLIC CAPITAL LETTER SOFT SIGN - u'\u042d' # 0x9D -> CYRILLIC CAPITAL LETTER E - u'\u042e' # 0x9E -> CYRILLIC CAPITAL LETTER YU - u'\u042f' # 0x9F -> CYRILLIC CAPITAL LETTER YA - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\u0490' # 0xA2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\u0406' # 0xA7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\u0402' # 0xAB -> CYRILLIC CAPITAL LETTER DJE - u'\u0452' # 0xAC -> CYRILLIC SMALL LETTER DJE - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u0403' # 0xAE -> CYRILLIC CAPITAL LETTER GJE - u'\u0453' # 0xAF -> CYRILLIC SMALL LETTER GJE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\u0456' # 0xB4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u0491' # 0xB6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN - u'\u0408' # 0xB7 -> CYRILLIC CAPITAL LETTER JE - u'\u0404' # 0xB8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE - u'\u0454' # 0xB9 -> CYRILLIC SMALL LETTER UKRAINIAN IE - u'\u0407' # 0xBA -> CYRILLIC CAPITAL LETTER YI - u'\u0457' # 0xBB -> CYRILLIC SMALL LETTER YI - u'\u0409' # 0xBC -> CYRILLIC CAPITAL LETTER LJE - u'\u0459' # 0xBD -> CYRILLIC SMALL LETTER LJE - u'\u040a' # 0xBE -> CYRILLIC CAPITAL LETTER NJE - u'\u045a' # 0xBF -> CYRILLIC SMALL LETTER NJE - u'\u0458' # 0xC0 -> CYRILLIC SMALL LETTER JE - u'\u0405' # 0xC1 -> CYRILLIC CAPITAL LETTER DZE - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\u040b' # 0xCB -> CYRILLIC CAPITAL LETTER TSHE - u'\u045b' # 0xCC -> CYRILLIC SMALL LETTER TSHE - u'\u040c' # 0xCD -> CYRILLIC CAPITAL LETTER KJE - u'\u045c' # 0xCE -> CYRILLIC SMALL LETTER KJE - u'\u0455' # 0xCF -> CYRILLIC SMALL LETTER DZE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u201e' # 0xD7 -> DOUBLE LOW-9 QUOTATION MARK - u'\u040e' # 0xD8 -> CYRILLIC CAPITAL LETTER SHORT U - u'\u045e' # 0xD9 -> CYRILLIC SMALL LETTER SHORT U - u'\u040f' # 0xDA -> CYRILLIC CAPITAL LETTER DZHE - u'\u045f' # 0xDB -> CYRILLIC SMALL LETTER DZHE - u'\u2116' # 0xDC -> NUMERO SIGN - u'\u0401' # 0xDD -> CYRILLIC CAPITAL LETTER IO - u'\u0451' # 0xDE -> CYRILLIC SMALL LETTER IO - u'\u044f' # 0xDF -> CYRILLIC SMALL LETTER YA - u'\u0430' # 0xE0 -> CYRILLIC SMALL LETTER A - u'\u0431' # 0xE1 -> CYRILLIC SMALL LETTER BE - u'\u0432' # 0xE2 -> CYRILLIC SMALL LETTER VE - u'\u0433' # 0xE3 -> CYRILLIC SMALL LETTER GHE - u'\u0434' # 0xE4 -> CYRILLIC SMALL LETTER DE - u'\u0435' # 0xE5 -> CYRILLIC SMALL LETTER IE - u'\u0436' # 0xE6 -> CYRILLIC SMALL LETTER ZHE - u'\u0437' # 0xE7 -> CYRILLIC SMALL LETTER ZE - u'\u0438' # 0xE8 -> CYRILLIC SMALL LETTER I - u'\u0439' # 0xE9 -> CYRILLIC SMALL LETTER SHORT I - u'\u043a' # 0xEA -> CYRILLIC SMALL LETTER KA - u'\u043b' # 0xEB -> CYRILLIC SMALL LETTER EL - u'\u043c' # 0xEC -> CYRILLIC SMALL LETTER EM - u'\u043d' # 0xED -> CYRILLIC SMALL LETTER EN - u'\u043e' # 0xEE -> CYRILLIC SMALL LETTER O - u'\u043f' # 0xEF -> CYRILLIC SMALL LETTER PE - u'\u0440' # 0xF0 -> CYRILLIC SMALL LETTER ER - u'\u0441' # 0xF1 -> CYRILLIC SMALL LETTER ES - u'\u0442' # 0xF2 -> CYRILLIC SMALL LETTER TE - u'\u0443' # 0xF3 -> CYRILLIC SMALL LETTER U - u'\u0444' # 0xF4 -> CYRILLIC SMALL LETTER EF - u'\u0445' # 0xF5 -> CYRILLIC SMALL LETTER HA - u'\u0446' # 0xF6 -> CYRILLIC SMALL LETTER TSE - u'\u0447' # 0xF7 -> CYRILLIC SMALL LETTER CHE - u'\u0448' # 0xF8 -> CYRILLIC SMALL LETTER SHA - u'\u0449' # 0xF9 -> CYRILLIC SMALL LETTER SHCHA - u'\u044a' # 0xFA -> CYRILLIC SMALL LETTER HARD SIGN - u'\u044b' # 0xFB -> CYRILLIC SMALL LETTER YERU - u'\u044c' # 0xFC -> CYRILLIC SMALL LETTER SOFT SIGN - u'\u044d' # 0xFD -> CYRILLIC SMALL LETTER E - u'\u044e' # 0xFE -> CYRILLIC SMALL LETTER YU - u'\u20ac' # 0xFF -> EURO SIGN -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/mac_farsi.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_farsi generated from 'MAPPINGS/VENDORS/APPLE/FARSI.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-farsi', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE, left-right - u'!' # 0x21 -> EXCLAMATION MARK, left-right - u'"' # 0x22 -> QUOTATION MARK, left-right - u'#' # 0x23 -> NUMBER SIGN, left-right - u'$' # 0x24 -> DOLLAR SIGN, left-right - u'%' # 0x25 -> PERCENT SIGN, left-right - u'&' # 0x26 -> AMPERSAND, left-right - u"'" # 0x27 -> APOSTROPHE, left-right - u'(' # 0x28 -> LEFT PARENTHESIS, left-right - u')' # 0x29 -> RIGHT PARENTHESIS, left-right - u'*' # 0x2A -> ASTERISK, left-right - u'+' # 0x2B -> PLUS SIGN, left-right - u',' # 0x2C -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR - u'-' # 0x2D -> HYPHEN-MINUS, left-right - u'.' # 0x2E -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR - u'/' # 0x2F -> SOLIDUS, left-right - u'0' # 0x30 -> DIGIT ZERO; in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE; in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE - u'2' # 0x32 -> DIGIT TWO; in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO - u'3' # 0x33 -> DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR; in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE; in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX; in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE; in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE - u':' # 0x3A -> COLON, left-right - u';' # 0x3B -> SEMICOLON, left-right - u'<' # 0x3C -> LESS-THAN SIGN, left-right - u'=' # 0x3D -> EQUALS SIGN, left-right - u'>' # 0x3E -> GREATER-THAN SIGN, left-right - u'?' # 0x3F -> QUESTION MARK, left-right - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET, left-right - u'\\' # 0x5C -> REVERSE SOLIDUS, left-right - u']' # 0x5D -> RIGHT SQUARE BRACKET, left-right - u'^' # 0x5E -> CIRCUMFLEX ACCENT, left-right - u'_' # 0x5F -> LOW LINE, left-right - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET, left-right - u'|' # 0x7C -> VERTICAL LINE, left-right - u'}' # 0x7D -> RIGHT CURLY BRACKET, left-right - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xa0' # 0x81 -> NO-BREAK SPACE, right-left - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u06ba' # 0x8B -> ARABIC LETTER NOON GHUNNA - u'\xab' # 0x8C -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\u2026' # 0x93 -> HORIZONTAL ELLIPSIS, right-left - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xbb' # 0x98 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf7' # 0x9B -> DIVISION SIGN, right-left - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u' ' # 0xA0 -> SPACE, right-left - u'!' # 0xA1 -> EXCLAMATION MARK, right-left - u'"' # 0xA2 -> QUOTATION MARK, right-left - u'#' # 0xA3 -> NUMBER SIGN, right-left - u'$' # 0xA4 -> DOLLAR SIGN, right-left - u'\u066a' # 0xA5 -> ARABIC PERCENT SIGN - u'&' # 0xA6 -> AMPERSAND, right-left - u"'" # 0xA7 -> APOSTROPHE, right-left - u'(' # 0xA8 -> LEFT PARENTHESIS, right-left - u')' # 0xA9 -> RIGHT PARENTHESIS, right-left - u'*' # 0xAA -> ASTERISK, right-left - u'+' # 0xAB -> PLUS SIGN, right-left - u'\u060c' # 0xAC -> ARABIC COMMA - u'-' # 0xAD -> HYPHEN-MINUS, right-left - u'.' # 0xAE -> FULL STOP, right-left - u'/' # 0xAF -> SOLIDUS, right-left - u'\u06f0' # 0xB0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override) - u'\u06f1' # 0xB1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override) - u'\u06f2' # 0xB2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override) - u'\u06f3' # 0xB3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override) - u'\u06f4' # 0xB4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override) - u'\u06f5' # 0xB5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override) - u'\u06f6' # 0xB6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override) - u'\u06f7' # 0xB7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override) - u'\u06f8' # 0xB8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override) - u'\u06f9' # 0xB9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override) - u':' # 0xBA -> COLON, right-left - u'\u061b' # 0xBB -> ARABIC SEMICOLON - u'<' # 0xBC -> LESS-THAN SIGN, right-left - u'=' # 0xBD -> EQUALS SIGN, right-left - u'>' # 0xBE -> GREATER-THAN SIGN, right-left - u'\u061f' # 0xBF -> ARABIC QUESTION MARK - u'\u274a' # 0xC0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left - u'\u0621' # 0xC1 -> ARABIC LETTER HAMZA - u'\u0622' # 0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE - u'\u0623' # 0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE - u'\u0624' # 0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE - u'\u0625' # 0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW - u'\u0626' # 0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE - u'\u0627' # 0xC7 -> ARABIC LETTER ALEF - u'\u0628' # 0xC8 -> ARABIC LETTER BEH - u'\u0629' # 0xC9 -> ARABIC LETTER TEH MARBUTA - u'\u062a' # 0xCA -> ARABIC LETTER TEH - u'\u062b' # 0xCB -> ARABIC LETTER THEH - u'\u062c' # 0xCC -> ARABIC LETTER JEEM - u'\u062d' # 0xCD -> ARABIC LETTER HAH - u'\u062e' # 0xCE -> ARABIC LETTER KHAH - u'\u062f' # 0xCF -> ARABIC LETTER DAL - u'\u0630' # 0xD0 -> ARABIC LETTER THAL - u'\u0631' # 0xD1 -> ARABIC LETTER REH - u'\u0632' # 0xD2 -> ARABIC LETTER ZAIN - u'\u0633' # 0xD3 -> ARABIC LETTER SEEN - u'\u0634' # 0xD4 -> ARABIC LETTER SHEEN - u'\u0635' # 0xD5 -> ARABIC LETTER SAD - u'\u0636' # 0xD6 -> ARABIC LETTER DAD - u'\u0637' # 0xD7 -> ARABIC LETTER TAH - u'\u0638' # 0xD8 -> ARABIC LETTER ZAH - u'\u0639' # 0xD9 -> ARABIC LETTER AIN - u'\u063a' # 0xDA -> ARABIC LETTER GHAIN - u'[' # 0xDB -> LEFT SQUARE BRACKET, right-left - u'\\' # 0xDC -> REVERSE SOLIDUS, right-left - u']' # 0xDD -> RIGHT SQUARE BRACKET, right-left - u'^' # 0xDE -> CIRCUMFLEX ACCENT, right-left - u'_' # 0xDF -> LOW LINE, right-left - u'\u0640' # 0xE0 -> ARABIC TATWEEL - u'\u0641' # 0xE1 -> ARABIC LETTER FEH - u'\u0642' # 0xE2 -> ARABIC LETTER QAF - u'\u0643' # 0xE3 -> ARABIC LETTER KAF - u'\u0644' # 0xE4 -> ARABIC LETTER LAM - u'\u0645' # 0xE5 -> ARABIC LETTER MEEM - u'\u0646' # 0xE6 -> ARABIC LETTER NOON - u'\u0647' # 0xE7 -> ARABIC LETTER HEH - u'\u0648' # 0xE8 -> ARABIC LETTER WAW - u'\u0649' # 0xE9 -> ARABIC LETTER ALEF MAKSURA - u'\u064a' # 0xEA -> ARABIC LETTER YEH - u'\u064b' # 0xEB -> ARABIC FATHATAN - u'\u064c' # 0xEC -> ARABIC DAMMATAN - u'\u064d' # 0xED -> ARABIC KASRATAN - u'\u064e' # 0xEE -> ARABIC FATHA - u'\u064f' # 0xEF -> ARABIC DAMMA - u'\u0650' # 0xF0 -> ARABIC KASRA - u'\u0651' # 0xF1 -> ARABIC SHADDA - u'\u0652' # 0xF2 -> ARABIC SUKUN - u'\u067e' # 0xF3 -> ARABIC LETTER PEH - u'\u0679' # 0xF4 -> ARABIC LETTER TTEH - u'\u0686' # 0xF5 -> ARABIC LETTER TCHEH - u'\u06d5' # 0xF6 -> ARABIC LETTER AE - u'\u06a4' # 0xF7 -> ARABIC LETTER VEH - u'\u06af' # 0xF8 -> ARABIC LETTER GAF - u'\u0688' # 0xF9 -> ARABIC LETTER DDAL - u'\u0691' # 0xFA -> ARABIC LETTER RREH - u'{' # 0xFB -> LEFT CURLY BRACKET, right-left - u'|' # 0xFC -> VERTICAL LINE, right-left - u'}' # 0xFD -> RIGHT CURLY BRACKET, right-left - u'\u0698' # 0xFE -> ARABIC LETTER JEH - u'\u06d2' # 0xFF -> ARABIC LETTER YEH BARREE -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/mac_greek.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_greek generated from 'MAPPINGS/VENDORS/APPLE/GREEK.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-greek', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xb9' # 0x81 -> SUPERSCRIPT ONE - u'\xb2' # 0x82 -> SUPERSCRIPT TWO - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xb3' # 0x84 -> SUPERSCRIPT THREE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\u0385' # 0x87 -> GREEK DIALYTIKA TONOS - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\u0384' # 0x8B -> GREEK TONOS - u'\xa8' # 0x8C -> DIAERESIS - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xa3' # 0x92 -> POUND SIGN - u'\u2122' # 0x93 -> TRADE MARK SIGN - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\u2022' # 0x96 -> BULLET - u'\xbd' # 0x97 -> VULGAR FRACTION ONE HALF - u'\u2030' # 0x98 -> PER MILLE SIGN - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xa6' # 0x9B -> BROKEN BAR - u'\u20ac' # 0x9C -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\u0393' # 0xA1 -> GREEK CAPITAL LETTER GAMMA - u'\u0394' # 0xA2 -> GREEK CAPITAL LETTER DELTA - u'\u0398' # 0xA3 -> GREEK CAPITAL LETTER THETA - u'\u039b' # 0xA4 -> GREEK CAPITAL LETTER LAMDA - u'\u039e' # 0xA5 -> GREEK CAPITAL LETTER XI - u'\u03a0' # 0xA6 -> GREEK CAPITAL LETTER PI - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u03a3' # 0xAA -> GREEK CAPITAL LETTER SIGMA - u'\u03aa' # 0xAB -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA - u'\xa7' # 0xAC -> SECTION SIGN - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xb0' # 0xAE -> DEGREE SIGN - u'\xb7' # 0xAF -> MIDDLE DOT - u'\u0391' # 0xB0 -> GREEK CAPITAL LETTER ALPHA - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\u0392' # 0xB5 -> GREEK CAPITAL LETTER BETA - u'\u0395' # 0xB6 -> GREEK CAPITAL LETTER EPSILON - u'\u0396' # 0xB7 -> GREEK CAPITAL LETTER ZETA - u'\u0397' # 0xB8 -> GREEK CAPITAL LETTER ETA - u'\u0399' # 0xB9 -> GREEK CAPITAL LETTER IOTA - u'\u039a' # 0xBA -> GREEK CAPITAL LETTER KAPPA - u'\u039c' # 0xBB -> GREEK CAPITAL LETTER MU - u'\u03a6' # 0xBC -> GREEK CAPITAL LETTER PHI - u'\u03ab' # 0xBD -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA - u'\u03a8' # 0xBE -> GREEK CAPITAL LETTER PSI - u'\u03a9' # 0xBF -> GREEK CAPITAL LETTER OMEGA - u'\u03ac' # 0xC0 -> GREEK SMALL LETTER ALPHA WITH TONOS - u'\u039d' # 0xC1 -> GREEK CAPITAL LETTER NU - u'\xac' # 0xC2 -> NOT SIGN - u'\u039f' # 0xC3 -> GREEK CAPITAL LETTER OMICRON - u'\u03a1' # 0xC4 -> GREEK CAPITAL LETTER RHO - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u03a4' # 0xC6 -> GREEK CAPITAL LETTER TAU - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\u03a5' # 0xCB -> GREEK CAPITAL LETTER UPSILON - u'\u03a7' # 0xCC -> GREEK CAPITAL LETTER CHI - u'\u0386' # 0xCD -> GREEK CAPITAL LETTER ALPHA WITH TONOS - u'\u0388' # 0xCE -> GREEK CAPITAL LETTER EPSILON WITH TONOS - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2015' # 0xD1 -> HORIZONTAL BAR - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u0389' # 0xD7 -> GREEK CAPITAL LETTER ETA WITH TONOS - u'\u038a' # 0xD8 -> GREEK CAPITAL LETTER IOTA WITH TONOS - u'\u038c' # 0xD9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS - u'\u038e' # 0xDA -> GREEK CAPITAL LETTER UPSILON WITH TONOS - u'\u03ad' # 0xDB -> GREEK SMALL LETTER EPSILON WITH TONOS - u'\u03ae' # 0xDC -> GREEK SMALL LETTER ETA WITH TONOS - u'\u03af' # 0xDD -> GREEK SMALL LETTER IOTA WITH TONOS - u'\u03cc' # 0xDE -> GREEK SMALL LETTER OMICRON WITH TONOS - u'\u038f' # 0xDF -> GREEK CAPITAL LETTER OMEGA WITH TONOS - u'\u03cd' # 0xE0 -> GREEK SMALL LETTER UPSILON WITH TONOS - u'\u03b1' # 0xE1 -> GREEK SMALL LETTER ALPHA - u'\u03b2' # 0xE2 -> GREEK SMALL LETTER BETA - u'\u03c8' # 0xE3 -> GREEK SMALL LETTER PSI - u'\u03b4' # 0xE4 -> GREEK SMALL LETTER DELTA - u'\u03b5' # 0xE5 -> GREEK SMALL LETTER EPSILON - u'\u03c6' # 0xE6 -> GREEK SMALL LETTER PHI - u'\u03b3' # 0xE7 -> GREEK SMALL LETTER GAMMA - u'\u03b7' # 0xE8 -> GREEK SMALL LETTER ETA - u'\u03b9' # 0xE9 -> GREEK SMALL LETTER IOTA - u'\u03be' # 0xEA -> GREEK SMALL LETTER XI - u'\u03ba' # 0xEB -> GREEK SMALL LETTER KAPPA - u'\u03bb' # 0xEC -> GREEK SMALL LETTER LAMDA - u'\u03bc' # 0xED -> GREEK SMALL LETTER MU - u'\u03bd' # 0xEE -> GREEK SMALL LETTER NU - u'\u03bf' # 0xEF -> GREEK SMALL LETTER OMICRON - u'\u03c0' # 0xF0 -> GREEK SMALL LETTER PI - u'\u03ce' # 0xF1 -> GREEK SMALL LETTER OMEGA WITH TONOS - u'\u03c1' # 0xF2 -> GREEK SMALL LETTER RHO - u'\u03c3' # 0xF3 -> GREEK SMALL LETTER SIGMA - u'\u03c4' # 0xF4 -> GREEK SMALL LETTER TAU - u'\u03b8' # 0xF5 -> GREEK SMALL LETTER THETA - u'\u03c9' # 0xF6 -> GREEK SMALL LETTER OMEGA - u'\u03c2' # 0xF7 -> GREEK SMALL LETTER FINAL SIGMA - u'\u03c7' # 0xF8 -> GREEK SMALL LETTER CHI - u'\u03c5' # 0xF9 -> GREEK SMALL LETTER UPSILON - u'\u03b6' # 0xFA -> GREEK SMALL LETTER ZETA - u'\u03ca' # 0xFB -> GREEK SMALL LETTER IOTA WITH DIALYTIKA - u'\u03cb' # 0xFC -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA - u'\u0390' # 0xFD -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS - u'\u03b0' # 0xFE -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS - u'\xad' # 0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/mac_iceland.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_iceland generated from 'MAPPINGS/VENDORS/APPLE/ICELAND.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-iceland', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\xdd' # 0xA0 -> LATIN CAPITAL LETTER Y WITH ACUTE - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xBE -> LATIN SMALL LETTER AE - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\xd0' # 0xDC -> LATIN CAPITAL LETTER ETH - u'\xf0' # 0xDD -> LATIN SMALL LETTER ETH - u'\xde' # 0xDE -> LATIN CAPITAL LETTER THORN - u'\xfe' # 0xDF -> LATIN SMALL LETTER THORN - u'\xfd' # 0xE0 -> LATIN SMALL LETTER Y WITH ACUTE - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/mac_latin2.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,183 +0,0 @@ -""" Python Character Mapping Codec generated from 'LATIN2.TXT' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-latin2', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x00c4, # LATIN CAPITAL LETTER A WITH DIAERESIS - 0x0081: 0x0100, # LATIN CAPITAL LETTER A WITH MACRON - 0x0082: 0x0101, # LATIN SMALL LETTER A WITH MACRON - 0x0083: 0x00c9, # LATIN CAPITAL LETTER E WITH ACUTE - 0x0084: 0x0104, # LATIN CAPITAL LETTER A WITH OGONEK - 0x0085: 0x00d6, # LATIN CAPITAL LETTER O WITH DIAERESIS - 0x0086: 0x00dc, # LATIN CAPITAL LETTER U WITH DIAERESIS - 0x0087: 0x00e1, # LATIN SMALL LETTER A WITH ACUTE - 0x0088: 0x0105, # LATIN SMALL LETTER A WITH OGONEK - 0x0089: 0x010c, # LATIN CAPITAL LETTER C WITH CARON - 0x008a: 0x00e4, # LATIN SMALL LETTER A WITH DIAERESIS - 0x008b: 0x010d, # LATIN SMALL LETTER C WITH CARON - 0x008c: 0x0106, # LATIN CAPITAL LETTER C WITH ACUTE - 0x008d: 0x0107, # LATIN SMALL LETTER C WITH ACUTE - 0x008e: 0x00e9, # LATIN SMALL LETTER E WITH ACUTE - 0x008f: 0x0179, # LATIN CAPITAL LETTER Z WITH ACUTE - 0x0090: 0x017a, # LATIN SMALL LETTER Z WITH ACUTE - 0x0091: 0x010e, # LATIN CAPITAL LETTER D WITH CARON - 0x0092: 0x00ed, # LATIN SMALL LETTER I WITH ACUTE - 0x0093: 0x010f, # LATIN SMALL LETTER D WITH CARON - 0x0094: 0x0112, # LATIN CAPITAL LETTER E WITH MACRON - 0x0095: 0x0113, # LATIN SMALL LETTER E WITH MACRON - 0x0096: 0x0116, # LATIN CAPITAL LETTER E WITH DOT ABOVE - 0x0097: 0x00f3, # LATIN SMALL LETTER O WITH ACUTE - 0x0098: 0x0117, # LATIN SMALL LETTER E WITH DOT ABOVE - 0x0099: 0x00f4, # LATIN SMALL LETTER O WITH CIRCUMFLEX - 0x009a: 0x00f6, # LATIN SMALL LETTER O WITH DIAERESIS - 0x009b: 0x00f5, # LATIN SMALL LETTER O WITH TILDE - 0x009c: 0x00fa, # LATIN SMALL LETTER U WITH ACUTE - 0x009d: 0x011a, # LATIN CAPITAL LETTER E WITH CARON - 0x009e: 0x011b, # LATIN SMALL LETTER E WITH CARON - 0x009f: 0x00fc, # LATIN SMALL LETTER U WITH DIAERESIS - 0x00a0: 0x2020, # DAGGER - 0x00a1: 0x00b0, # DEGREE SIGN - 0x00a2: 0x0118, # LATIN CAPITAL LETTER E WITH OGONEK - 0x00a4: 0x00a7, # SECTION SIGN - 0x00a5: 0x2022, # BULLET - 0x00a6: 0x00b6, # PILCROW SIGN - 0x00a7: 0x00df, # LATIN SMALL LETTER SHARP S - 0x00a8: 0x00ae, # REGISTERED SIGN - 0x00aa: 0x2122, # TRADE MARK SIGN - 0x00ab: 0x0119, # LATIN SMALL LETTER E WITH OGONEK - 0x00ac: 0x00a8, # DIAERESIS - 0x00ad: 0x2260, # NOT EQUAL TO - 0x00ae: 0x0123, # LATIN SMALL LETTER G WITH CEDILLA - 0x00af: 0x012e, # LATIN CAPITAL LETTER I WITH OGONEK - 0x00b0: 0x012f, # LATIN SMALL LETTER I WITH OGONEK - 0x00b1: 0x012a, # LATIN CAPITAL LETTER I WITH MACRON - 0x00b2: 0x2264, # LESS-THAN OR EQUAL TO - 0x00b3: 0x2265, # GREATER-THAN OR EQUAL TO - 0x00b4: 0x012b, # LATIN SMALL LETTER I WITH MACRON - 0x00b5: 0x0136, # LATIN CAPITAL LETTER K WITH CEDILLA - 0x00b6: 0x2202, # PARTIAL DIFFERENTIAL - 0x00b7: 0x2211, # N-ARY SUMMATION - 0x00b8: 0x0142, # LATIN SMALL LETTER L WITH STROKE - 0x00b9: 0x013b, # LATIN CAPITAL LETTER L WITH CEDILLA - 0x00ba: 0x013c, # LATIN SMALL LETTER L WITH CEDILLA - 0x00bb: 0x013d, # LATIN CAPITAL LETTER L WITH CARON - 0x00bc: 0x013e, # LATIN SMALL LETTER L WITH CARON - 0x00bd: 0x0139, # LATIN CAPITAL LETTER L WITH ACUTE - 0x00be: 0x013a, # LATIN SMALL LETTER L WITH ACUTE - 0x00bf: 0x0145, # LATIN CAPITAL LETTER N WITH CEDILLA - 0x00c0: 0x0146, # LATIN SMALL LETTER N WITH CEDILLA - 0x00c1: 0x0143, # LATIN CAPITAL LETTER N WITH ACUTE - 0x00c2: 0x00ac, # NOT SIGN - 0x00c3: 0x221a, # SQUARE ROOT - 0x00c4: 0x0144, # LATIN SMALL LETTER N WITH ACUTE - 0x00c5: 0x0147, # LATIN CAPITAL LETTER N WITH CARON - 0x00c6: 0x2206, # INCREMENT - 0x00c7: 0x00ab, # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c8: 0x00bb, # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - 0x00c9: 0x2026, # HORIZONTAL ELLIPSIS - 0x00ca: 0x00a0, # NO-BREAK SPACE - 0x00cb: 0x0148, # LATIN SMALL LETTER N WITH CARON - 0x00cc: 0x0150, # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE - 0x00cd: 0x00d5, # LATIN CAPITAL LETTER O WITH TILDE - 0x00ce: 0x0151, # LATIN SMALL LETTER O WITH DOUBLE ACUTE - 0x00cf: 0x014c, # LATIN CAPITAL LETTER O WITH MACRON - 0x00d0: 0x2013, # EN DASH - 0x00d1: 0x2014, # EM DASH - 0x00d2: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x00d3: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x00d4: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x00d5: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x00d6: 0x00f7, # DIVISION SIGN - 0x00d7: 0x25ca, # LOZENGE - 0x00d8: 0x014d, # LATIN SMALL LETTER O WITH MACRON - 0x00d9: 0x0154, # LATIN CAPITAL LETTER R WITH ACUTE - 0x00da: 0x0155, # LATIN SMALL LETTER R WITH ACUTE - 0x00db: 0x0158, # LATIN CAPITAL LETTER R WITH CARON - 0x00dc: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x00dd: 0x203a, # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x00de: 0x0159, # LATIN SMALL LETTER R WITH CARON - 0x00df: 0x0156, # LATIN CAPITAL LETTER R WITH CEDILLA - 0x00e0: 0x0157, # LATIN SMALL LETTER R WITH CEDILLA - 0x00e1: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x00e2: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x00e3: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x00e4: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x00e5: 0x015a, # LATIN CAPITAL LETTER S WITH ACUTE - 0x00e6: 0x015b, # LATIN SMALL LETTER S WITH ACUTE - 0x00e7: 0x00c1, # LATIN CAPITAL LETTER A WITH ACUTE - 0x00e8: 0x0164, # LATIN CAPITAL LETTER T WITH CARON - 0x00e9: 0x0165, # LATIN SMALL LETTER T WITH CARON - 0x00ea: 0x00cd, # LATIN CAPITAL LETTER I WITH ACUTE - 0x00eb: 0x017d, # LATIN CAPITAL LETTER Z WITH CARON - 0x00ec: 0x017e, # LATIN SMALL LETTER Z WITH CARON - 0x00ed: 0x016a, # LATIN CAPITAL LETTER U WITH MACRON - 0x00ee: 0x00d3, # LATIN CAPITAL LETTER O WITH ACUTE - 0x00ef: 0x00d4, # LATIN CAPITAL LETTER O WITH CIRCUMFLEX - 0x00f0: 0x016b, # LATIN SMALL LETTER U WITH MACRON - 0x00f1: 0x016e, # LATIN CAPITAL LETTER U WITH RING ABOVE - 0x00f2: 0x00da, # LATIN CAPITAL LETTER U WITH ACUTE - 0x00f3: 0x016f, # LATIN SMALL LETTER U WITH RING ABOVE - 0x00f4: 0x0170, # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE - 0x00f5: 0x0171, # LATIN SMALL LETTER U WITH DOUBLE ACUTE - 0x00f6: 0x0172, # LATIN CAPITAL LETTER U WITH OGONEK - 0x00f7: 0x0173, # LATIN SMALL LETTER U WITH OGONEK - 0x00f8: 0x00dd, # LATIN CAPITAL LETTER Y WITH ACUTE - 0x00f9: 0x00fd, # LATIN SMALL LETTER Y WITH ACUTE - 0x00fa: 0x0137, # LATIN SMALL LETTER K WITH CEDILLA - 0x00fb: 0x017b, # LATIN CAPITAL LETTER Z WITH DOT ABOVE - 0x00fc: 0x0141, # LATIN CAPITAL LETTER L WITH STROKE - 0x00fd: 0x017c, # LATIN SMALL LETTER Z WITH DOT ABOVE - 0x00fe: 0x0122, # LATIN CAPITAL LETTER G WITH CEDILLA - 0x00ff: 0x02c7, # CARON -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map)
--- a/test/lib/python2.7/encodings/mac_roman.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_roman generated from 'MAPPINGS/VENDORS/APPLE/ROMAN.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-roman', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xBE -> LATIN SMALL LETTER AE - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\ufb01' # 0xDE -> LATIN SMALL LIGATURE FI - u'\ufb02' # 0xDF -> LATIN SMALL LIGATURE FL - u'\u2021' # 0xE0 -> DOUBLE DAGGER - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/mac_romanian.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_romanian generated from 'MAPPINGS/VENDORS/APPLE/ROMANIAN.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-romanian', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\u0102' # 0xAE -> LATIN CAPITAL LETTER A WITH BREVE - u'\u0218' # 0xAF -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\u0103' # 0xBE -> LATIN SMALL LETTER A WITH BREVE - u'\u0219' # 0xBF -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u2044' # 0xDA -> FRACTION SLASH - u'\u20ac' # 0xDB -> EURO SIGN - u'\u2039' # 0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK - u'\u203a' # 0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - u'\u021a' # 0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - u'\u021b' # 0xDF -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later - u'\u2021' # 0xE0 -> DOUBLE DAGGER - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\u0131' # 0xF5 -> LATIN SMALL LETTER DOTLESS I - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/mac_turkish.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec mac_turkish generated from 'MAPPINGS/VENDORS/APPLE/TURKISH.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mac-turkish', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> CONTROL CHARACTER - u'\x01' # 0x01 -> CONTROL CHARACTER - u'\x02' # 0x02 -> CONTROL CHARACTER - u'\x03' # 0x03 -> CONTROL CHARACTER - u'\x04' # 0x04 -> CONTROL CHARACTER - u'\x05' # 0x05 -> CONTROL CHARACTER - u'\x06' # 0x06 -> CONTROL CHARACTER - u'\x07' # 0x07 -> CONTROL CHARACTER - u'\x08' # 0x08 -> CONTROL CHARACTER - u'\t' # 0x09 -> CONTROL CHARACTER - u'\n' # 0x0A -> CONTROL CHARACTER - u'\x0b' # 0x0B -> CONTROL CHARACTER - u'\x0c' # 0x0C -> CONTROL CHARACTER - u'\r' # 0x0D -> CONTROL CHARACTER - u'\x0e' # 0x0E -> CONTROL CHARACTER - u'\x0f' # 0x0F -> CONTROL CHARACTER - u'\x10' # 0x10 -> CONTROL CHARACTER - u'\x11' # 0x11 -> CONTROL CHARACTER - u'\x12' # 0x12 -> CONTROL CHARACTER - u'\x13' # 0x13 -> CONTROL CHARACTER - u'\x14' # 0x14 -> CONTROL CHARACTER - u'\x15' # 0x15 -> CONTROL CHARACTER - u'\x16' # 0x16 -> CONTROL CHARACTER - u'\x17' # 0x17 -> CONTROL CHARACTER - u'\x18' # 0x18 -> CONTROL CHARACTER - u'\x19' # 0x19 -> CONTROL CHARACTER - u'\x1a' # 0x1A -> CONTROL CHARACTER - u'\x1b' # 0x1B -> CONTROL CHARACTER - u'\x1c' # 0x1C -> CONTROL CHARACTER - u'\x1d' # 0x1D -> CONTROL CHARACTER - u'\x1e' # 0x1E -> CONTROL CHARACTER - u'\x1f' # 0x1F -> CONTROL CHARACTER - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> CONTROL CHARACTER - u'\xc4' # 0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS - u'\xc5' # 0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE - u'\xc7' # 0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA - u'\xc9' # 0x83 -> LATIN CAPITAL LETTER E WITH ACUTE - u'\xd1' # 0x84 -> LATIN CAPITAL LETTER N WITH TILDE - u'\xd6' # 0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS - u'\xdc' # 0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS - u'\xe1' # 0x87 -> LATIN SMALL LETTER A WITH ACUTE - u'\xe0' # 0x88 -> LATIN SMALL LETTER A WITH GRAVE - u'\xe2' # 0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX - u'\xe4' # 0x8A -> LATIN SMALL LETTER A WITH DIAERESIS - u'\xe3' # 0x8B -> LATIN SMALL LETTER A WITH TILDE - u'\xe5' # 0x8C -> LATIN SMALL LETTER A WITH RING ABOVE - u'\xe7' # 0x8D -> LATIN SMALL LETTER C WITH CEDILLA - u'\xe9' # 0x8E -> LATIN SMALL LETTER E WITH ACUTE - u'\xe8' # 0x8F -> LATIN SMALL LETTER E WITH GRAVE - u'\xea' # 0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX - u'\xeb' # 0x91 -> LATIN SMALL LETTER E WITH DIAERESIS - u'\xed' # 0x92 -> LATIN SMALL LETTER I WITH ACUTE - u'\xec' # 0x93 -> LATIN SMALL LETTER I WITH GRAVE - u'\xee' # 0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX - u'\xef' # 0x95 -> LATIN SMALL LETTER I WITH DIAERESIS - u'\xf1' # 0x96 -> LATIN SMALL LETTER N WITH TILDE - u'\xf3' # 0x97 -> LATIN SMALL LETTER O WITH ACUTE - u'\xf2' # 0x98 -> LATIN SMALL LETTER O WITH GRAVE - u'\xf4' # 0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX - u'\xf6' # 0x9A -> LATIN SMALL LETTER O WITH DIAERESIS - u'\xf5' # 0x9B -> LATIN SMALL LETTER O WITH TILDE - u'\xfa' # 0x9C -> LATIN SMALL LETTER U WITH ACUTE - u'\xf9' # 0x9D -> LATIN SMALL LETTER U WITH GRAVE - u'\xfb' # 0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX - u'\xfc' # 0x9F -> LATIN SMALL LETTER U WITH DIAERESIS - u'\u2020' # 0xA0 -> DAGGER - u'\xb0' # 0xA1 -> DEGREE SIGN - u'\xa2' # 0xA2 -> CENT SIGN - u'\xa3' # 0xA3 -> POUND SIGN - u'\xa7' # 0xA4 -> SECTION SIGN - u'\u2022' # 0xA5 -> BULLET - u'\xb6' # 0xA6 -> PILCROW SIGN - u'\xdf' # 0xA7 -> LATIN SMALL LETTER SHARP S - u'\xae' # 0xA8 -> REGISTERED SIGN - u'\xa9' # 0xA9 -> COPYRIGHT SIGN - u'\u2122' # 0xAA -> TRADE MARK SIGN - u'\xb4' # 0xAB -> ACUTE ACCENT - u'\xa8' # 0xAC -> DIAERESIS - u'\u2260' # 0xAD -> NOT EQUAL TO - u'\xc6' # 0xAE -> LATIN CAPITAL LETTER AE - u'\xd8' # 0xAF -> LATIN CAPITAL LETTER O WITH STROKE - u'\u221e' # 0xB0 -> INFINITY - u'\xb1' # 0xB1 -> PLUS-MINUS SIGN - u'\u2264' # 0xB2 -> LESS-THAN OR EQUAL TO - u'\u2265' # 0xB3 -> GREATER-THAN OR EQUAL TO - u'\xa5' # 0xB4 -> YEN SIGN - u'\xb5' # 0xB5 -> MICRO SIGN - u'\u2202' # 0xB6 -> PARTIAL DIFFERENTIAL - u'\u2211' # 0xB7 -> N-ARY SUMMATION - u'\u220f' # 0xB8 -> N-ARY PRODUCT - u'\u03c0' # 0xB9 -> GREEK SMALL LETTER PI - u'\u222b' # 0xBA -> INTEGRAL - u'\xaa' # 0xBB -> FEMININE ORDINAL INDICATOR - u'\xba' # 0xBC -> MASCULINE ORDINAL INDICATOR - u'\u03a9' # 0xBD -> GREEK CAPITAL LETTER OMEGA - u'\xe6' # 0xBE -> LATIN SMALL LETTER AE - u'\xf8' # 0xBF -> LATIN SMALL LETTER O WITH STROKE - u'\xbf' # 0xC0 -> INVERTED QUESTION MARK - u'\xa1' # 0xC1 -> INVERTED EXCLAMATION MARK - u'\xac' # 0xC2 -> NOT SIGN - u'\u221a' # 0xC3 -> SQUARE ROOT - u'\u0192' # 0xC4 -> LATIN SMALL LETTER F WITH HOOK - u'\u2248' # 0xC5 -> ALMOST EQUAL TO - u'\u2206' # 0xC6 -> INCREMENT - u'\xab' # 0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\xbb' # 0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK - u'\u2026' # 0xC9 -> HORIZONTAL ELLIPSIS - u'\xa0' # 0xCA -> NO-BREAK SPACE - u'\xc0' # 0xCB -> LATIN CAPITAL LETTER A WITH GRAVE - u'\xc3' # 0xCC -> LATIN CAPITAL LETTER A WITH TILDE - u'\xd5' # 0xCD -> LATIN CAPITAL LETTER O WITH TILDE - u'\u0152' # 0xCE -> LATIN CAPITAL LIGATURE OE - u'\u0153' # 0xCF -> LATIN SMALL LIGATURE OE - u'\u2013' # 0xD0 -> EN DASH - u'\u2014' # 0xD1 -> EM DASH - u'\u201c' # 0xD2 -> LEFT DOUBLE QUOTATION MARK - u'\u201d' # 0xD3 -> RIGHT DOUBLE QUOTATION MARK - u'\u2018' # 0xD4 -> LEFT SINGLE QUOTATION MARK - u'\u2019' # 0xD5 -> RIGHT SINGLE QUOTATION MARK - u'\xf7' # 0xD6 -> DIVISION SIGN - u'\u25ca' # 0xD7 -> LOZENGE - u'\xff' # 0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS - u'\u0178' # 0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS - u'\u011e' # 0xDA -> LATIN CAPITAL LETTER G WITH BREVE - u'\u011f' # 0xDB -> LATIN SMALL LETTER G WITH BREVE - u'\u0130' # 0xDC -> LATIN CAPITAL LETTER I WITH DOT ABOVE - u'\u0131' # 0xDD -> LATIN SMALL LETTER DOTLESS I - u'\u015e' # 0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA - u'\u015f' # 0xDF -> LATIN SMALL LETTER S WITH CEDILLA - u'\u2021' # 0xE0 -> DOUBLE DAGGER - u'\xb7' # 0xE1 -> MIDDLE DOT - u'\u201a' # 0xE2 -> SINGLE LOW-9 QUOTATION MARK - u'\u201e' # 0xE3 -> DOUBLE LOW-9 QUOTATION MARK - u'\u2030' # 0xE4 -> PER MILLE SIGN - u'\xc2' # 0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX - u'\xca' # 0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX - u'\xc1' # 0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE - u'\xcb' # 0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS - u'\xc8' # 0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE - u'\xcd' # 0xEA -> LATIN CAPITAL LETTER I WITH ACUTE - u'\xce' # 0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX - u'\xcf' # 0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS - u'\xcc' # 0xED -> LATIN CAPITAL LETTER I WITH GRAVE - u'\xd3' # 0xEE -> LATIN CAPITAL LETTER O WITH ACUTE - u'\xd4' # 0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX - u'\uf8ff' # 0xF0 -> Apple logo - u'\xd2' # 0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE - u'\xda' # 0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE - u'\xdb' # 0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX - u'\xd9' # 0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE - u'\uf8a0' # 0xF5 -> undefined1 - u'\u02c6' # 0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT - u'\u02dc' # 0xF7 -> SMALL TILDE - u'\xaf' # 0xF8 -> MACRON - u'\u02d8' # 0xF9 -> BREVE - u'\u02d9' # 0xFA -> DOT ABOVE - u'\u02da' # 0xFB -> RING ABOVE - u'\xb8' # 0xFC -> CEDILLA - u'\u02dd' # 0xFD -> DOUBLE ACUTE ACCENT - u'\u02db' # 0xFE -> OGONEK - u'\u02c7' # 0xFF -> CARON -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/mbcs.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,47 +0,0 @@ -""" Python 'mbcs' Codec for Windows - - -Cloned by Mark Hammond (mhammond@skippinet.com.au) from ascii.py, -which was written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -# Import them explicitly to cause an ImportError -# on non-Windows systems -from codecs import mbcs_encode, mbcs_decode -# for IncrementalDecoder, IncrementalEncoder, ... -import codecs - -### Codec APIs - -encode = mbcs_encode - -def decode(input, errors='strict'): - return mbcs_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return mbcs_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = mbcs_decode - -class StreamWriter(codecs.StreamWriter): - encode = mbcs_encode - -class StreamReader(codecs.StreamReader): - decode = mbcs_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='mbcs', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/palmos.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,83 +0,0 @@ -""" Python Character Mapping Codec for PalmOS 3.5. - -Written by Sjoerd Mullender (sjoerd@acm.org); based on iso8859_15.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='palmos', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) - -# The PalmOS character set is mostly iso-8859-1 with some differences. -decoding_map.update({ - 0x0080: 0x20ac, # EURO SIGN - 0x0082: 0x201a, # SINGLE LOW-9 QUOTATION MARK - 0x0083: 0x0192, # LATIN SMALL LETTER F WITH HOOK - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x2020, # DAGGER - 0x0087: 0x2021, # DOUBLE DAGGER - 0x0088: 0x02c6, # MODIFIER LETTER CIRCUMFLEX ACCENT - 0x0089: 0x2030, # PER MILLE SIGN - 0x008a: 0x0160, # LATIN CAPITAL LETTER S WITH CARON - 0x008b: 0x2039, # SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 0x008c: 0x0152, # LATIN CAPITAL LIGATURE OE - 0x008d: 0x2666, # BLACK DIAMOND SUIT - 0x008e: 0x2663, # BLACK CLUB SUIT - 0x008f: 0x2665, # BLACK HEART SUIT - 0x0090: 0x2660, # BLACK SPADE SUIT - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x02dc, # SMALL TILDE - 0x0099: 0x2122, # TRADE MARK SIGN - 0x009a: 0x0161, # LATIN SMALL LETTER S WITH CARON - 0x009c: 0x0153, # LATIN SMALL LIGATURE OE - 0x009f: 0x0178, # LATIN CAPITAL LETTER Y WITH DIAERESIS -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map)
--- a/test/lib/python2.7/encodings/ptcp154.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,175 +0,0 @@ -""" Python Character Mapping Codec generated from 'PTCP154.txt' with gencodec.py. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. -(c) Copyright 2000 Guido van Rossum. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='ptcp154', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0080: 0x0496, # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER - 0x0081: 0x0492, # CYRILLIC CAPITAL LETTER GHE WITH STROKE - 0x0082: 0x04ee, # CYRILLIC CAPITAL LETTER U WITH MACRON - 0x0083: 0x0493, # CYRILLIC SMALL LETTER GHE WITH STROKE - 0x0084: 0x201e, # DOUBLE LOW-9 QUOTATION MARK - 0x0085: 0x2026, # HORIZONTAL ELLIPSIS - 0x0086: 0x04b6, # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER - 0x0087: 0x04ae, # CYRILLIC CAPITAL LETTER STRAIGHT U - 0x0088: 0x04b2, # CYRILLIC CAPITAL LETTER HA WITH DESCENDER - 0x0089: 0x04af, # CYRILLIC SMALL LETTER STRAIGHT U - 0x008a: 0x04a0, # CYRILLIC CAPITAL LETTER BASHKIR KA - 0x008b: 0x04e2, # CYRILLIC CAPITAL LETTER I WITH MACRON - 0x008c: 0x04a2, # CYRILLIC CAPITAL LETTER EN WITH DESCENDER - 0x008d: 0x049a, # CYRILLIC CAPITAL LETTER KA WITH DESCENDER - 0x008e: 0x04ba, # CYRILLIC CAPITAL LETTER SHHA - 0x008f: 0x04b8, # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE - 0x0090: 0x0497, # CYRILLIC SMALL LETTER ZHE WITH DESCENDER - 0x0091: 0x2018, # LEFT SINGLE QUOTATION MARK - 0x0092: 0x2019, # RIGHT SINGLE QUOTATION MARK - 0x0093: 0x201c, # LEFT DOUBLE QUOTATION MARK - 0x0094: 0x201d, # RIGHT DOUBLE QUOTATION MARK - 0x0095: 0x2022, # BULLET - 0x0096: 0x2013, # EN DASH - 0x0097: 0x2014, # EM DASH - 0x0098: 0x04b3, # CYRILLIC SMALL LETTER HA WITH DESCENDER - 0x0099: 0x04b7, # CYRILLIC SMALL LETTER CHE WITH DESCENDER - 0x009a: 0x04a1, # CYRILLIC SMALL LETTER BASHKIR KA - 0x009b: 0x04e3, # CYRILLIC SMALL LETTER I WITH MACRON - 0x009c: 0x04a3, # CYRILLIC SMALL LETTER EN WITH DESCENDER - 0x009d: 0x049b, # CYRILLIC SMALL LETTER KA WITH DESCENDER - 0x009e: 0x04bb, # CYRILLIC SMALL LETTER SHHA - 0x009f: 0x04b9, # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE - 0x00a1: 0x040e, # CYRILLIC CAPITAL LETTER SHORT U (Byelorussian) - 0x00a2: 0x045e, # CYRILLIC SMALL LETTER SHORT U (Byelorussian) - 0x00a3: 0x0408, # CYRILLIC CAPITAL LETTER JE - 0x00a4: 0x04e8, # CYRILLIC CAPITAL LETTER BARRED O - 0x00a5: 0x0498, # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER - 0x00a6: 0x04b0, # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE - 0x00a8: 0x0401, # CYRILLIC CAPITAL LETTER IO - 0x00aa: 0x04d8, # CYRILLIC CAPITAL LETTER SCHWA - 0x00ad: 0x04ef, # CYRILLIC SMALL LETTER U WITH MACRON - 0x00af: 0x049c, # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE - 0x00b1: 0x04b1, # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE - 0x00b2: 0x0406, # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b3: 0x0456, # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I - 0x00b4: 0x0499, # CYRILLIC SMALL LETTER ZE WITH DESCENDER - 0x00b5: 0x04e9, # CYRILLIC SMALL LETTER BARRED O - 0x00b8: 0x0451, # CYRILLIC SMALL LETTER IO - 0x00b9: 0x2116, # NUMERO SIGN - 0x00ba: 0x04d9, # CYRILLIC SMALL LETTER SCHWA - 0x00bc: 0x0458, # CYRILLIC SMALL LETTER JE - 0x00bd: 0x04aa, # CYRILLIC CAPITAL LETTER ES WITH DESCENDER - 0x00be: 0x04ab, # CYRILLIC SMALL LETTER ES WITH DESCENDER - 0x00bf: 0x049d, # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE - 0x00c0: 0x0410, # CYRILLIC CAPITAL LETTER A - 0x00c1: 0x0411, # CYRILLIC CAPITAL LETTER BE - 0x00c2: 0x0412, # CYRILLIC CAPITAL LETTER VE - 0x00c3: 0x0413, # CYRILLIC CAPITAL LETTER GHE - 0x00c4: 0x0414, # CYRILLIC CAPITAL LETTER DE - 0x00c5: 0x0415, # CYRILLIC CAPITAL LETTER IE - 0x00c6: 0x0416, # CYRILLIC CAPITAL LETTER ZHE - 0x00c7: 0x0417, # CYRILLIC CAPITAL LETTER ZE - 0x00c8: 0x0418, # CYRILLIC CAPITAL LETTER I - 0x00c9: 0x0419, # CYRILLIC CAPITAL LETTER SHORT I - 0x00ca: 0x041a, # CYRILLIC CAPITAL LETTER KA - 0x00cb: 0x041b, # CYRILLIC CAPITAL LETTER EL - 0x00cc: 0x041c, # CYRILLIC CAPITAL LETTER EM - 0x00cd: 0x041d, # CYRILLIC CAPITAL LETTER EN - 0x00ce: 0x041e, # CYRILLIC CAPITAL LETTER O - 0x00cf: 0x041f, # CYRILLIC CAPITAL LETTER PE - 0x00d0: 0x0420, # CYRILLIC CAPITAL LETTER ER - 0x00d1: 0x0421, # CYRILLIC CAPITAL LETTER ES - 0x00d2: 0x0422, # CYRILLIC CAPITAL LETTER TE - 0x00d3: 0x0423, # CYRILLIC CAPITAL LETTER U - 0x00d4: 0x0424, # CYRILLIC CAPITAL LETTER EF - 0x00d5: 0x0425, # CYRILLIC CAPITAL LETTER HA - 0x00d6: 0x0426, # CYRILLIC CAPITAL LETTER TSE - 0x00d7: 0x0427, # CYRILLIC CAPITAL LETTER CHE - 0x00d8: 0x0428, # CYRILLIC CAPITAL LETTER SHA - 0x00d9: 0x0429, # CYRILLIC CAPITAL LETTER SHCHA - 0x00da: 0x042a, # CYRILLIC CAPITAL LETTER HARD SIGN - 0x00db: 0x042b, # CYRILLIC CAPITAL LETTER YERU - 0x00dc: 0x042c, # CYRILLIC CAPITAL LETTER SOFT SIGN - 0x00dd: 0x042d, # CYRILLIC CAPITAL LETTER E - 0x00de: 0x042e, # CYRILLIC CAPITAL LETTER YU - 0x00df: 0x042f, # CYRILLIC CAPITAL LETTER YA - 0x00e0: 0x0430, # CYRILLIC SMALL LETTER A - 0x00e1: 0x0431, # CYRILLIC SMALL LETTER BE - 0x00e2: 0x0432, # CYRILLIC SMALL LETTER VE - 0x00e3: 0x0433, # CYRILLIC SMALL LETTER GHE - 0x00e4: 0x0434, # CYRILLIC SMALL LETTER DE - 0x00e5: 0x0435, # CYRILLIC SMALL LETTER IE - 0x00e6: 0x0436, # CYRILLIC SMALL LETTER ZHE - 0x00e7: 0x0437, # CYRILLIC SMALL LETTER ZE - 0x00e8: 0x0438, # CYRILLIC SMALL LETTER I - 0x00e9: 0x0439, # CYRILLIC SMALL LETTER SHORT I - 0x00ea: 0x043a, # CYRILLIC SMALL LETTER KA - 0x00eb: 0x043b, # CYRILLIC SMALL LETTER EL - 0x00ec: 0x043c, # CYRILLIC SMALL LETTER EM - 0x00ed: 0x043d, # CYRILLIC SMALL LETTER EN - 0x00ee: 0x043e, # CYRILLIC SMALL LETTER O - 0x00ef: 0x043f, # CYRILLIC SMALL LETTER PE - 0x00f0: 0x0440, # CYRILLIC SMALL LETTER ER - 0x00f1: 0x0441, # CYRILLIC SMALL LETTER ES - 0x00f2: 0x0442, # CYRILLIC SMALL LETTER TE - 0x00f3: 0x0443, # CYRILLIC SMALL LETTER U - 0x00f4: 0x0444, # CYRILLIC SMALL LETTER EF - 0x00f5: 0x0445, # CYRILLIC SMALL LETTER HA - 0x00f6: 0x0446, # CYRILLIC SMALL LETTER TSE - 0x00f7: 0x0447, # CYRILLIC SMALL LETTER CHE - 0x00f8: 0x0448, # CYRILLIC SMALL LETTER SHA - 0x00f9: 0x0449, # CYRILLIC SMALL LETTER SHCHA - 0x00fa: 0x044a, # CYRILLIC SMALL LETTER HARD SIGN - 0x00fb: 0x044b, # CYRILLIC SMALL LETTER YERU - 0x00fc: 0x044c, # CYRILLIC SMALL LETTER SOFT SIGN - 0x00fd: 0x044d, # CYRILLIC SMALL LETTER E - 0x00fe: 0x044e, # CYRILLIC SMALL LETTER YU - 0x00ff: 0x044f, # CYRILLIC SMALL LETTER YA -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map)
--- a/test/lib/python2.7/encodings/punycode.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,238 +0,0 @@ -# -*- coding: iso-8859-1 -*- -""" Codec for the Punicode encoding, as specified in RFC 3492 - -Written by Martin v. Löwis. -""" - -import codecs - -##################### Encoding ##################################### - -def segregate(str): - """3.1 Basic code point segregation""" - base = [] - extended = {} - for c in str: - if ord(c) < 128: - base.append(c) - else: - extended[c] = 1 - extended = extended.keys() - extended.sort() - return "".join(base).encode("ascii"),extended - -def selective_len(str, max): - """Return the length of str, considering only characters below max.""" - res = 0 - for c in str: - if ord(c) < max: - res += 1 - return res - -def selective_find(str, char, index, pos): - """Return a pair (index, pos), indicating the next occurrence of - char in str. index is the position of the character considering - only ordinals up to and including char, and pos is the position in - the full string. index/pos is the starting position in the full - string.""" - - l = len(str) - while 1: - pos += 1 - if pos == l: - return (-1, -1) - c = str[pos] - if c == char: - return index+1, pos - elif c < char: - index += 1 - -def insertion_unsort(str, extended): - """3.2 Insertion unsort coding""" - oldchar = 0x80 - result = [] - oldindex = -1 - for c in extended: - index = pos = -1 - char = ord(c) - curlen = selective_len(str, char) - delta = (curlen+1) * (char - oldchar) - while 1: - index,pos = selective_find(str,c,index,pos) - if index == -1: - break - delta += index - oldindex - result.append(delta-1) - oldindex = index - delta = 0 - oldchar = char - - return result - -def T(j, bias): - # Punycode parameters: tmin = 1, tmax = 26, base = 36 - res = 36 * (j + 1) - bias - if res < 1: return 1 - if res > 26: return 26 - return res - -digits = "abcdefghijklmnopqrstuvwxyz0123456789" -def generate_generalized_integer(N, bias): - """3.3 Generalized variable-length integers""" - result = [] - j = 0 - while 1: - t = T(j, bias) - if N < t: - result.append(digits[N]) - return result - result.append(digits[t + ((N - t) % (36 - t))]) - N = (N - t) // (36 - t) - j += 1 - -def adapt(delta, first, numchars): - if first: - delta //= 700 - else: - delta //= 2 - delta += delta // numchars - # ((base - tmin) * tmax) // 2 == 455 - divisions = 0 - while delta > 455: - delta = delta // 35 # base - tmin - divisions += 36 - bias = divisions + (36 * delta // (delta + 38)) - return bias - - -def generate_integers(baselen, deltas): - """3.4 Bias adaptation""" - # Punycode parameters: initial bias = 72, damp = 700, skew = 38 - result = [] - bias = 72 - for points, delta in enumerate(deltas): - s = generate_generalized_integer(delta, bias) - result.extend(s) - bias = adapt(delta, points==0, baselen+points+1) - return "".join(result) - -def punycode_encode(text): - base, extended = segregate(text) - base = base.encode("ascii") - deltas = insertion_unsort(text, extended) - extended = generate_integers(len(base), deltas) - if base: - return base + "-" + extended - return extended - -##################### Decoding ##################################### - -def decode_generalized_number(extended, extpos, bias, errors): - """3.3 Generalized variable-length integers""" - result = 0 - w = 1 - j = 0 - while 1: - try: - char = ord(extended[extpos]) - except IndexError: - if errors == "strict": - raise UnicodeError, "incomplete punicode string" - return extpos + 1, None - extpos += 1 - if 0x41 <= char <= 0x5A: # A-Z - digit = char - 0x41 - elif 0x30 <= char <= 0x39: - digit = char - 22 # 0x30-26 - elif errors == "strict": - raise UnicodeError("Invalid extended code point '%s'" - % extended[extpos]) - else: - return extpos, None - t = T(j, bias) - result += digit * w - if digit < t: - return extpos, result - w = w * (36 - t) - j += 1 - - -def insertion_sort(base, extended, errors): - """3.2 Insertion unsort coding""" - char = 0x80 - pos = -1 - bias = 72 - extpos = 0 - while extpos < len(extended): - newpos, delta = decode_generalized_number(extended, extpos, - bias, errors) - if delta is None: - # There was an error in decoding. We can't continue because - # synchronization is lost. - return base - pos += delta+1 - char += pos // (len(base) + 1) - if char > 0x10FFFF: - if errors == "strict": - raise UnicodeError, ("Invalid character U+%x" % char) - char = ord('?') - pos = pos % (len(base) + 1) - base = base[:pos] + unichr(char) + base[pos:] - bias = adapt(delta, (extpos == 0), len(base)) - extpos = newpos - return base - -def punycode_decode(text, errors): - pos = text.rfind("-") - if pos == -1: - base = "" - extended = text - else: - base = text[:pos] - extended = text[pos+1:] - base = unicode(base, "ascii", errors) - extended = extended.upper() - return insertion_sort(base, extended, errors) - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - res = punycode_encode(input) - return res, len(input) - - def decode(self,input,errors='strict'): - if errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError, "Unsupported error handling "+errors - res = punycode_decode(input, errors) - return res, len(input) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return punycode_encode(input) - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - if self.errors not in ('strict', 'replace', 'ignore'): - raise UnicodeError, "Unsupported error handling "+self.errors - return punycode_decode(input, self.errors) - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='punycode', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - )
--- a/test/lib/python2.7/encodings/quopri_codec.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,76 +0,0 @@ -"""Codec for quoted-printable encoding. - -Like base64 and rot13, this returns Python strings, not Unicode. -""" - -import codecs, quopri -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO - -def quopri_encode(input, errors='strict'): - """Encode the input, returning a tuple (output object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - # using str() because of cStringIO's Unicode undesired Unicode behavior. - f = StringIO(str(input)) - g = StringIO() - quopri.encode(f, g, quotetabs=True) - output = g.getvalue() - return (output, len(input)) - -def quopri_decode(input, errors='strict'): - """Decode the input, returning a tuple (output object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - f = StringIO(str(input)) - g = StringIO() - quopri.decode(f, g) - output = g.getvalue() - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input,errors='strict'): - return quopri_encode(input,errors) - def decode(self, input,errors='strict'): - return quopri_decode(input,errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return quopri_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return quopri_decode(input, self.errors)[0] - -class StreamWriter(Codec, codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -# encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='quopri', - encode=quopri_encode, - decode=quopri_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - )
--- a/test/lib/python2.7/encodings/raw_unicode_escape.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -""" Python 'raw-unicode-escape' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.raw_unicode_escape_encode - decode = codecs.raw_unicode_escape_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.raw_unicode_escape_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.raw_unicode_escape_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='raw-unicode-escape', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - )
--- a/test/lib/python2.7/encodings/rot_13.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,119 +0,0 @@ -#! /usr/bin/python2.7 -""" Python Character Mapping Codec for ROT13. - - See http://ucsub.colorado.edu/~kominek/rot13/ for details. - - Written by Marc-Andre Lemburg (mal@lemburg.com). - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_map) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_map) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_map)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_map)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='rot-13', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - _is_text_encoding=False, - ) - -### Decoding Map - -decoding_map = codecs.make_identity_dict(range(256)) -decoding_map.update({ - 0x0041: 0x004e, - 0x0042: 0x004f, - 0x0043: 0x0050, - 0x0044: 0x0051, - 0x0045: 0x0052, - 0x0046: 0x0053, - 0x0047: 0x0054, - 0x0048: 0x0055, - 0x0049: 0x0056, - 0x004a: 0x0057, - 0x004b: 0x0058, - 0x004c: 0x0059, - 0x004d: 0x005a, - 0x004e: 0x0041, - 0x004f: 0x0042, - 0x0050: 0x0043, - 0x0051: 0x0044, - 0x0052: 0x0045, - 0x0053: 0x0046, - 0x0054: 0x0047, - 0x0055: 0x0048, - 0x0056: 0x0049, - 0x0057: 0x004a, - 0x0058: 0x004b, - 0x0059: 0x004c, - 0x005a: 0x004d, - 0x0061: 0x006e, - 0x0062: 0x006f, - 0x0063: 0x0070, - 0x0064: 0x0071, - 0x0065: 0x0072, - 0x0066: 0x0073, - 0x0067: 0x0074, - 0x0068: 0x0075, - 0x0069: 0x0076, - 0x006a: 0x0077, - 0x006b: 0x0078, - 0x006c: 0x0079, - 0x006d: 0x007a, - 0x006e: 0x0061, - 0x006f: 0x0062, - 0x0070: 0x0063, - 0x0071: 0x0064, - 0x0072: 0x0065, - 0x0073: 0x0066, - 0x0074: 0x0067, - 0x0075: 0x0068, - 0x0076: 0x0069, - 0x0077: 0x006a, - 0x0078: 0x006b, - 0x0079: 0x006c, - 0x007a: 0x006d, -}) - -### Encoding Map - -encoding_map = codecs.make_encoding_map(decoding_map) - -### Filter API - -def rot13(infile, outfile): - outfile.write(infile.read().encode('rot-13')) - -if __name__ == '__main__': - import sys - rot13(sys.stdin, sys.stdout)
--- a/test/lib/python2.7/encodings/shift_jis.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# shift_jis.py: Python Unicode Codec for SHIFT_JIS -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('shift_jis') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='shift_jis', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/shift_jis_2004.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# shift_jis_2004.py: Python Unicode Codec for SHIFT_JIS_2004 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('shift_jis_2004') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='shift_jis_2004', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/shift_jisx0213.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,39 +0,0 @@ -# -# shift_jisx0213.py: Python Unicode Codec for SHIFT_JISX0213 -# -# Written by Hye-Shik Chang <perky@FreeBSD.org> -# - -import _codecs_jp, codecs -import _multibytecodec as mbc - -codec = _codecs_jp.getcodec('shift_jisx0213') - -class Codec(codecs.Codec): - encode = codec.encode - decode = codec.decode - -class IncrementalEncoder(mbc.MultibyteIncrementalEncoder, - codecs.IncrementalEncoder): - codec = codec - -class IncrementalDecoder(mbc.MultibyteIncrementalDecoder, - codecs.IncrementalDecoder): - codec = codec - -class StreamReader(Codec, mbc.MultibyteStreamReader, codecs.StreamReader): - codec = codec - -class StreamWriter(Codec, mbc.MultibyteStreamWriter, codecs.StreamWriter): - codec = codec - -def getregentry(): - return codecs.CodecInfo( - name='shift_jisx0213', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/string_escape.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -# -*- coding: iso-8859-1 -*- -""" Python 'escape' Codec - - -Written by Martin v. Löwis (martin@v.loewis.de). - -""" -import codecs - -class Codec(codecs.Codec): - - encode = codecs.escape_encode - decode = codecs.escape_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.escape_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.escape_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -def getregentry(): - return codecs.CodecInfo( - name='string-escape', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - )
--- a/test/lib/python2.7/encodings/tis_620.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,307 +0,0 @@ -""" Python Character Mapping Codec tis_620 generated from 'python-mappings/TIS-620.TXT' with gencodec.py. - -"""#" - -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return codecs.charmap_encode(input,errors,encoding_table) - - def decode(self,input,errors='strict'): - return codecs.charmap_decode(input,errors,decoding_table) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.charmap_encode(input,self.errors,encoding_table)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.charmap_decode(input,self.errors,decoding_table)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='tis-620', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - ) - - -### Decoding Table - -decoding_table = ( - u'\x00' # 0x00 -> NULL - u'\x01' # 0x01 -> START OF HEADING - u'\x02' # 0x02 -> START OF TEXT - u'\x03' # 0x03 -> END OF TEXT - u'\x04' # 0x04 -> END OF TRANSMISSION - u'\x05' # 0x05 -> ENQUIRY - u'\x06' # 0x06 -> ACKNOWLEDGE - u'\x07' # 0x07 -> BELL - u'\x08' # 0x08 -> BACKSPACE - u'\t' # 0x09 -> HORIZONTAL TABULATION - u'\n' # 0x0A -> LINE FEED - u'\x0b' # 0x0B -> VERTICAL TABULATION - u'\x0c' # 0x0C -> FORM FEED - u'\r' # 0x0D -> CARRIAGE RETURN - u'\x0e' # 0x0E -> SHIFT OUT - u'\x0f' # 0x0F -> SHIFT IN - u'\x10' # 0x10 -> DATA LINK ESCAPE - u'\x11' # 0x11 -> DEVICE CONTROL ONE - u'\x12' # 0x12 -> DEVICE CONTROL TWO - u'\x13' # 0x13 -> DEVICE CONTROL THREE - u'\x14' # 0x14 -> DEVICE CONTROL FOUR - u'\x15' # 0x15 -> NEGATIVE ACKNOWLEDGE - u'\x16' # 0x16 -> SYNCHRONOUS IDLE - u'\x17' # 0x17 -> END OF TRANSMISSION BLOCK - u'\x18' # 0x18 -> CANCEL - u'\x19' # 0x19 -> END OF MEDIUM - u'\x1a' # 0x1A -> SUBSTITUTE - u'\x1b' # 0x1B -> ESCAPE - u'\x1c' # 0x1C -> FILE SEPARATOR - u'\x1d' # 0x1D -> GROUP SEPARATOR - u'\x1e' # 0x1E -> RECORD SEPARATOR - u'\x1f' # 0x1F -> UNIT SEPARATOR - u' ' # 0x20 -> SPACE - u'!' # 0x21 -> EXCLAMATION MARK - u'"' # 0x22 -> QUOTATION MARK - u'#' # 0x23 -> NUMBER SIGN - u'$' # 0x24 -> DOLLAR SIGN - u'%' # 0x25 -> PERCENT SIGN - u'&' # 0x26 -> AMPERSAND - u"'" # 0x27 -> APOSTROPHE - u'(' # 0x28 -> LEFT PARENTHESIS - u')' # 0x29 -> RIGHT PARENTHESIS - u'*' # 0x2A -> ASTERISK - u'+' # 0x2B -> PLUS SIGN - u',' # 0x2C -> COMMA - u'-' # 0x2D -> HYPHEN-MINUS - u'.' # 0x2E -> FULL STOP - u'/' # 0x2F -> SOLIDUS - u'0' # 0x30 -> DIGIT ZERO - u'1' # 0x31 -> DIGIT ONE - u'2' # 0x32 -> DIGIT TWO - u'3' # 0x33 -> DIGIT THREE - u'4' # 0x34 -> DIGIT FOUR - u'5' # 0x35 -> DIGIT FIVE - u'6' # 0x36 -> DIGIT SIX - u'7' # 0x37 -> DIGIT SEVEN - u'8' # 0x38 -> DIGIT EIGHT - u'9' # 0x39 -> DIGIT NINE - u':' # 0x3A -> COLON - u';' # 0x3B -> SEMICOLON - u'<' # 0x3C -> LESS-THAN SIGN - u'=' # 0x3D -> EQUALS SIGN - u'>' # 0x3E -> GREATER-THAN SIGN - u'?' # 0x3F -> QUESTION MARK - u'@' # 0x40 -> COMMERCIAL AT - u'A' # 0x41 -> LATIN CAPITAL LETTER A - u'B' # 0x42 -> LATIN CAPITAL LETTER B - u'C' # 0x43 -> LATIN CAPITAL LETTER C - u'D' # 0x44 -> LATIN CAPITAL LETTER D - u'E' # 0x45 -> LATIN CAPITAL LETTER E - u'F' # 0x46 -> LATIN CAPITAL LETTER F - u'G' # 0x47 -> LATIN CAPITAL LETTER G - u'H' # 0x48 -> LATIN CAPITAL LETTER H - u'I' # 0x49 -> LATIN CAPITAL LETTER I - u'J' # 0x4A -> LATIN CAPITAL LETTER J - u'K' # 0x4B -> LATIN CAPITAL LETTER K - u'L' # 0x4C -> LATIN CAPITAL LETTER L - u'M' # 0x4D -> LATIN CAPITAL LETTER M - u'N' # 0x4E -> LATIN CAPITAL LETTER N - u'O' # 0x4F -> LATIN CAPITAL LETTER O - u'P' # 0x50 -> LATIN CAPITAL LETTER P - u'Q' # 0x51 -> LATIN CAPITAL LETTER Q - u'R' # 0x52 -> LATIN CAPITAL LETTER R - u'S' # 0x53 -> LATIN CAPITAL LETTER S - u'T' # 0x54 -> LATIN CAPITAL LETTER T - u'U' # 0x55 -> LATIN CAPITAL LETTER U - u'V' # 0x56 -> LATIN CAPITAL LETTER V - u'W' # 0x57 -> LATIN CAPITAL LETTER W - u'X' # 0x58 -> LATIN CAPITAL LETTER X - u'Y' # 0x59 -> LATIN CAPITAL LETTER Y - u'Z' # 0x5A -> LATIN CAPITAL LETTER Z - u'[' # 0x5B -> LEFT SQUARE BRACKET - u'\\' # 0x5C -> REVERSE SOLIDUS - u']' # 0x5D -> RIGHT SQUARE BRACKET - u'^' # 0x5E -> CIRCUMFLEX ACCENT - u'_' # 0x5F -> LOW LINE - u'`' # 0x60 -> GRAVE ACCENT - u'a' # 0x61 -> LATIN SMALL LETTER A - u'b' # 0x62 -> LATIN SMALL LETTER B - u'c' # 0x63 -> LATIN SMALL LETTER C - u'd' # 0x64 -> LATIN SMALL LETTER D - u'e' # 0x65 -> LATIN SMALL LETTER E - u'f' # 0x66 -> LATIN SMALL LETTER F - u'g' # 0x67 -> LATIN SMALL LETTER G - u'h' # 0x68 -> LATIN SMALL LETTER H - u'i' # 0x69 -> LATIN SMALL LETTER I - u'j' # 0x6A -> LATIN SMALL LETTER J - u'k' # 0x6B -> LATIN SMALL LETTER K - u'l' # 0x6C -> LATIN SMALL LETTER L - u'm' # 0x6D -> LATIN SMALL LETTER M - u'n' # 0x6E -> LATIN SMALL LETTER N - u'o' # 0x6F -> LATIN SMALL LETTER O - u'p' # 0x70 -> LATIN SMALL LETTER P - u'q' # 0x71 -> LATIN SMALL LETTER Q - u'r' # 0x72 -> LATIN SMALL LETTER R - u's' # 0x73 -> LATIN SMALL LETTER S - u't' # 0x74 -> LATIN SMALL LETTER T - u'u' # 0x75 -> LATIN SMALL LETTER U - u'v' # 0x76 -> LATIN SMALL LETTER V - u'w' # 0x77 -> LATIN SMALL LETTER W - u'x' # 0x78 -> LATIN SMALL LETTER X - u'y' # 0x79 -> LATIN SMALL LETTER Y - u'z' # 0x7A -> LATIN SMALL LETTER Z - u'{' # 0x7B -> LEFT CURLY BRACKET - u'|' # 0x7C -> VERTICAL LINE - u'}' # 0x7D -> RIGHT CURLY BRACKET - u'~' # 0x7E -> TILDE - u'\x7f' # 0x7F -> DELETE - u'\x80' # 0x80 -> <control> - u'\x81' # 0x81 -> <control> - u'\x82' # 0x82 -> <control> - u'\x83' # 0x83 -> <control> - u'\x84' # 0x84 -> <control> - u'\x85' # 0x85 -> <control> - u'\x86' # 0x86 -> <control> - u'\x87' # 0x87 -> <control> - u'\x88' # 0x88 -> <control> - u'\x89' # 0x89 -> <control> - u'\x8a' # 0x8A -> <control> - u'\x8b' # 0x8B -> <control> - u'\x8c' # 0x8C -> <control> - u'\x8d' # 0x8D -> <control> - u'\x8e' # 0x8E -> <control> - u'\x8f' # 0x8F -> <control> - u'\x90' # 0x90 -> <control> - u'\x91' # 0x91 -> <control> - u'\x92' # 0x92 -> <control> - u'\x93' # 0x93 -> <control> - u'\x94' # 0x94 -> <control> - u'\x95' # 0x95 -> <control> - u'\x96' # 0x96 -> <control> - u'\x97' # 0x97 -> <control> - u'\x98' # 0x98 -> <control> - u'\x99' # 0x99 -> <control> - u'\x9a' # 0x9A -> <control> - u'\x9b' # 0x9B -> <control> - u'\x9c' # 0x9C -> <control> - u'\x9d' # 0x9D -> <control> - u'\x9e' # 0x9E -> <control> - u'\x9f' # 0x9F -> <control> - u'\ufffe' - u'\u0e01' # 0xA1 -> THAI CHARACTER KO KAI - u'\u0e02' # 0xA2 -> THAI CHARACTER KHO KHAI - u'\u0e03' # 0xA3 -> THAI CHARACTER KHO KHUAT - u'\u0e04' # 0xA4 -> THAI CHARACTER KHO KHWAI - u'\u0e05' # 0xA5 -> THAI CHARACTER KHO KHON - u'\u0e06' # 0xA6 -> THAI CHARACTER KHO RAKHANG - u'\u0e07' # 0xA7 -> THAI CHARACTER NGO NGU - u'\u0e08' # 0xA8 -> THAI CHARACTER CHO CHAN - u'\u0e09' # 0xA9 -> THAI CHARACTER CHO CHING - u'\u0e0a' # 0xAA -> THAI CHARACTER CHO CHANG - u'\u0e0b' # 0xAB -> THAI CHARACTER SO SO - u'\u0e0c' # 0xAC -> THAI CHARACTER CHO CHOE - u'\u0e0d' # 0xAD -> THAI CHARACTER YO YING - u'\u0e0e' # 0xAE -> THAI CHARACTER DO CHADA - u'\u0e0f' # 0xAF -> THAI CHARACTER TO PATAK - u'\u0e10' # 0xB0 -> THAI CHARACTER THO THAN - u'\u0e11' # 0xB1 -> THAI CHARACTER THO NANGMONTHO - u'\u0e12' # 0xB2 -> THAI CHARACTER THO PHUTHAO - u'\u0e13' # 0xB3 -> THAI CHARACTER NO NEN - u'\u0e14' # 0xB4 -> THAI CHARACTER DO DEK - u'\u0e15' # 0xB5 -> THAI CHARACTER TO TAO - u'\u0e16' # 0xB6 -> THAI CHARACTER THO THUNG - u'\u0e17' # 0xB7 -> THAI CHARACTER THO THAHAN - u'\u0e18' # 0xB8 -> THAI CHARACTER THO THONG - u'\u0e19' # 0xB9 -> THAI CHARACTER NO NU - u'\u0e1a' # 0xBA -> THAI CHARACTER BO BAIMAI - u'\u0e1b' # 0xBB -> THAI CHARACTER PO PLA - u'\u0e1c' # 0xBC -> THAI CHARACTER PHO PHUNG - u'\u0e1d' # 0xBD -> THAI CHARACTER FO FA - u'\u0e1e' # 0xBE -> THAI CHARACTER PHO PHAN - u'\u0e1f' # 0xBF -> THAI CHARACTER FO FAN - u'\u0e20' # 0xC0 -> THAI CHARACTER PHO SAMPHAO - u'\u0e21' # 0xC1 -> THAI CHARACTER MO MA - u'\u0e22' # 0xC2 -> THAI CHARACTER YO YAK - u'\u0e23' # 0xC3 -> THAI CHARACTER RO RUA - u'\u0e24' # 0xC4 -> THAI CHARACTER RU - u'\u0e25' # 0xC5 -> THAI CHARACTER LO LING - u'\u0e26' # 0xC6 -> THAI CHARACTER LU - u'\u0e27' # 0xC7 -> THAI CHARACTER WO WAEN - u'\u0e28' # 0xC8 -> THAI CHARACTER SO SALA - u'\u0e29' # 0xC9 -> THAI CHARACTER SO RUSI - u'\u0e2a' # 0xCA -> THAI CHARACTER SO SUA - u'\u0e2b' # 0xCB -> THAI CHARACTER HO HIP - u'\u0e2c' # 0xCC -> THAI CHARACTER LO CHULA - u'\u0e2d' # 0xCD -> THAI CHARACTER O ANG - u'\u0e2e' # 0xCE -> THAI CHARACTER HO NOKHUK - u'\u0e2f' # 0xCF -> THAI CHARACTER PAIYANNOI - u'\u0e30' # 0xD0 -> THAI CHARACTER SARA A - u'\u0e31' # 0xD1 -> THAI CHARACTER MAI HAN-AKAT - u'\u0e32' # 0xD2 -> THAI CHARACTER SARA AA - u'\u0e33' # 0xD3 -> THAI CHARACTER SARA AM - u'\u0e34' # 0xD4 -> THAI CHARACTER SARA I - u'\u0e35' # 0xD5 -> THAI CHARACTER SARA II - u'\u0e36' # 0xD6 -> THAI CHARACTER SARA UE - u'\u0e37' # 0xD7 -> THAI CHARACTER SARA UEE - u'\u0e38' # 0xD8 -> THAI CHARACTER SARA U - u'\u0e39' # 0xD9 -> THAI CHARACTER SARA UU - u'\u0e3a' # 0xDA -> THAI CHARACTER PHINTHU - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\u0e3f' # 0xDF -> THAI CURRENCY SYMBOL BAHT - u'\u0e40' # 0xE0 -> THAI CHARACTER SARA E - u'\u0e41' # 0xE1 -> THAI CHARACTER SARA AE - u'\u0e42' # 0xE2 -> THAI CHARACTER SARA O - u'\u0e43' # 0xE3 -> THAI CHARACTER SARA AI MAIMUAN - u'\u0e44' # 0xE4 -> THAI CHARACTER SARA AI MAIMALAI - u'\u0e45' # 0xE5 -> THAI CHARACTER LAKKHANGYAO - u'\u0e46' # 0xE6 -> THAI CHARACTER MAIYAMOK - u'\u0e47' # 0xE7 -> THAI CHARACTER MAITAIKHU - u'\u0e48' # 0xE8 -> THAI CHARACTER MAI EK - u'\u0e49' # 0xE9 -> THAI CHARACTER MAI THO - u'\u0e4a' # 0xEA -> THAI CHARACTER MAI TRI - u'\u0e4b' # 0xEB -> THAI CHARACTER MAI CHATTAWA - u'\u0e4c' # 0xEC -> THAI CHARACTER THANTHAKHAT - u'\u0e4d' # 0xED -> THAI CHARACTER NIKHAHIT - u'\u0e4e' # 0xEE -> THAI CHARACTER YAMAKKAN - u'\u0e4f' # 0xEF -> THAI CHARACTER FONGMAN - u'\u0e50' # 0xF0 -> THAI DIGIT ZERO - u'\u0e51' # 0xF1 -> THAI DIGIT ONE - u'\u0e52' # 0xF2 -> THAI DIGIT TWO - u'\u0e53' # 0xF3 -> THAI DIGIT THREE - u'\u0e54' # 0xF4 -> THAI DIGIT FOUR - u'\u0e55' # 0xF5 -> THAI DIGIT FIVE - u'\u0e56' # 0xF6 -> THAI DIGIT SIX - u'\u0e57' # 0xF7 -> THAI DIGIT SEVEN - u'\u0e58' # 0xF8 -> THAI DIGIT EIGHT - u'\u0e59' # 0xF9 -> THAI DIGIT NINE - u'\u0e5a' # 0xFA -> THAI CHARACTER ANGKHANKHU - u'\u0e5b' # 0xFB -> THAI CHARACTER KHOMUT - u'\ufffe' - u'\ufffe' - u'\ufffe' - u'\ufffe' -) - -### Encoding table -encoding_table=codecs.charmap_build(decoding_table)
--- a/test/lib/python2.7/encodings/undefined.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,49 +0,0 @@ -""" Python 'undefined' Codec - - This codec will always raise a ValueError exception when being - used. It is intended for use by the site.py file to switch off - automatic string to Unicode coercion. - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - raise UnicodeError("undefined encoding") - - def decode(self,input,errors='strict'): - raise UnicodeError("undefined encoding") - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - raise UnicodeError("undefined encoding") - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - raise UnicodeError("undefined encoding") - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='undefined', - encode=Codec().encode, - decode=Codec().decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - )
--- a/test/lib/python2.7/encodings/unicode_escape.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -""" Python 'unicode-escape' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.unicode_escape_encode - decode = codecs.unicode_escape_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.unicode_escape_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.unicode_escape_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='unicode-escape', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - )
--- a/test/lib/python2.7/encodings/unicode_internal.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,45 +0,0 @@ -""" Python 'unicode-internal' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -class Codec(codecs.Codec): - - # Note: Binding these as C functions will result in the class not - # converting them to methods. This is intended. - encode = codecs.unicode_internal_encode - decode = codecs.unicode_internal_decode - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.unicode_internal_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return codecs.unicode_internal_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='unicode-internal', - encode=Codec.encode, - decode=Codec.decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamwriter=StreamWriter, - streamreader=StreamReader, - )
--- a/test/lib/python2.7/encodings/utf_16.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,126 +0,0 @@ -""" Python 'utf-16' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs, sys - -### Codec APIs - -encode = codecs.utf_16_encode - -def decode(input, errors='strict'): - return codecs.utf_16_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - codecs.IncrementalEncoder.__init__(self, errors) - self.encoder = None - - def encode(self, input, final=False): - if self.encoder is None: - result = codecs.utf_16_encode(input, self.errors)[0] - if sys.byteorder == 'little': - self.encoder = codecs.utf_16_le_encode - else: - self.encoder = codecs.utf_16_be_encode - return result - return self.encoder(input, self.errors)[0] - - def reset(self): - codecs.IncrementalEncoder.reset(self) - self.encoder = None - - def getstate(self): - # state info we return to the caller: - # 0: stream is in natural order for this platform - # 2: endianness hasn't been determined yet - # (we're never writing in unnatural order) - return (2 if self.encoder is None else 0) - - def setstate(self, state): - if state: - self.encoder = None - else: - if sys.byteorder == 'little': - self.encoder = codecs.utf_16_le_encode - else: - self.encoder = codecs.utf_16_be_encode - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def __init__(self, errors='strict'): - codecs.BufferedIncrementalDecoder.__init__(self, errors) - self.decoder = None - - def _buffer_decode(self, input, errors, final): - if self.decoder is None: - (output, consumed, byteorder) = \ - codecs.utf_16_ex_decode(input, errors, 0, final) - if byteorder == -1: - self.decoder = codecs.utf_16_le_decode - elif byteorder == 1: - self.decoder = codecs.utf_16_be_decode - elif consumed >= 2: - raise UnicodeError("UTF-16 stream does not start with BOM") - return (output, consumed) - return self.decoder(input, self.errors, final) - - def reset(self): - codecs.BufferedIncrementalDecoder.reset(self) - self.decoder = None - -class StreamWriter(codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - codecs.StreamWriter.__init__(self, stream, errors) - self.encoder = None - - def reset(self): - codecs.StreamWriter.reset(self) - self.encoder = None - - def encode(self, input, errors='strict'): - if self.encoder is None: - result = codecs.utf_16_encode(input, errors) - if sys.byteorder == 'little': - self.encoder = codecs.utf_16_le_encode - else: - self.encoder = codecs.utf_16_be_encode - return result - else: - return self.encoder(input, errors) - -class StreamReader(codecs.StreamReader): - - def reset(self): - codecs.StreamReader.reset(self) - try: - del self.decode - except AttributeError: - pass - - def decode(self, input, errors='strict'): - (object, consumed, byteorder) = \ - codecs.utf_16_ex_decode(input, errors, 0, False) - if byteorder == -1: - self.decode = codecs.utf_16_le_decode - elif byteorder == 1: - self.decode = codecs.utf_16_be_decode - elif consumed>=2: - raise UnicodeError,"UTF-16 stream does not start with BOM" - return (object, consumed) - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-16', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/utf_16_be.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -""" Python 'utf-16-be' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -encode = codecs.utf_16_be_encode - -def decode(input, errors='strict'): - return codecs.utf_16_be_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_16_be_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_16_be_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_16_be_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_16_be_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-16-be', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/utf_16_le.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -""" Python 'utf-16-le' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -encode = codecs.utf_16_le_encode - -def decode(input, errors='strict'): - return codecs.utf_16_le_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_16_le_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_16_le_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_16_le_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_16_le_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-16-le', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/utf_32.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,150 +0,0 @@ -""" -Python 'utf-32' Codec -""" -import codecs, sys - -### Codec APIs - -encode = codecs.utf_32_encode - -def decode(input, errors='strict'): - return codecs.utf_32_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - codecs.IncrementalEncoder.__init__(self, errors) - self.encoder = None - - def encode(self, input, final=False): - if self.encoder is None: - result = codecs.utf_32_encode(input, self.errors)[0] - if sys.byteorder == 'little': - self.encoder = codecs.utf_32_le_encode - else: - self.encoder = codecs.utf_32_be_encode - return result - return self.encoder(input, self.errors)[0] - - def reset(self): - codecs.IncrementalEncoder.reset(self) - self.encoder = None - - def getstate(self): - # state info we return to the caller: - # 0: stream is in natural order for this platform - # 2: endianness hasn't been determined yet - # (we're never writing in unnatural order) - return (2 if self.encoder is None else 0) - - def setstate(self, state): - if state: - self.encoder = None - else: - if sys.byteorder == 'little': - self.encoder = codecs.utf_32_le_encode - else: - self.encoder = codecs.utf_32_be_encode - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def __init__(self, errors='strict'): - codecs.BufferedIncrementalDecoder.__init__(self, errors) - self.decoder = None - - def _buffer_decode(self, input, errors, final): - if self.decoder is None: - (output, consumed, byteorder) = \ - codecs.utf_32_ex_decode(input, errors, 0, final) - if byteorder == -1: - self.decoder = codecs.utf_32_le_decode - elif byteorder == 1: - self.decoder = codecs.utf_32_be_decode - elif consumed >= 4: - raise UnicodeError("UTF-32 stream does not start with BOM") - return (output, consumed) - return self.decoder(input, self.errors, final) - - def reset(self): - codecs.BufferedIncrementalDecoder.reset(self) - self.decoder = None - - def getstate(self): - # additional state info from the base class must be None here, - # as it isn't passed along to the caller - state = codecs.BufferedIncrementalDecoder.getstate(self)[0] - # additional state info we pass to the caller: - # 0: stream is in natural order for this platform - # 1: stream is in unnatural order - # 2: endianness hasn't been determined yet - if self.decoder is None: - return (state, 2) - addstate = int((sys.byteorder == "big") != - (self.decoder is codecs.utf_32_be_decode)) - return (state, addstate) - - def setstate(self, state): - # state[1] will be ignored by BufferedIncrementalDecoder.setstate() - codecs.BufferedIncrementalDecoder.setstate(self, state) - state = state[1] - if state == 0: - self.decoder = (codecs.utf_32_be_decode - if sys.byteorder == "big" - else codecs.utf_32_le_decode) - elif state == 1: - self.decoder = (codecs.utf_32_le_decode - if sys.byteorder == "big" - else codecs.utf_32_be_decode) - else: - self.decoder = None - -class StreamWriter(codecs.StreamWriter): - def __init__(self, stream, errors='strict'): - self.encoder = None - codecs.StreamWriter.__init__(self, stream, errors) - - def reset(self): - codecs.StreamWriter.reset(self) - self.encoder = None - - def encode(self, input, errors='strict'): - if self.encoder is None: - result = codecs.utf_32_encode(input, errors) - if sys.byteorder == 'little': - self.encoder = codecs.utf_32_le_encode - else: - self.encoder = codecs.utf_32_be_encode - return result - else: - return self.encoder(input, errors) - -class StreamReader(codecs.StreamReader): - - def reset(self): - codecs.StreamReader.reset(self) - try: - del self.decode - except AttributeError: - pass - - def decode(self, input, errors='strict'): - (object, consumed, byteorder) = \ - codecs.utf_32_ex_decode(input, errors, 0, False) - if byteorder == -1: - self.decode = codecs.utf_32_le_decode - elif byteorder == 1: - self.decode = codecs.utf_32_be_decode - elif consumed>=4: - raise UnicodeError,"UTF-32 stream does not start with BOM" - return (object, consumed) - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-32', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/utf_32_be.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -""" -Python 'utf-32-be' Codec -""" -import codecs - -### Codec APIs - -encode = codecs.utf_32_be_encode - -def decode(input, errors='strict'): - return codecs.utf_32_be_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_32_be_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_32_be_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_32_be_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_32_be_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-32-be', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/utf_32_le.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ -""" -Python 'utf-32-le' Codec -""" -import codecs - -### Codec APIs - -encode = codecs.utf_32_le_encode - -def decode(input, errors='strict'): - return codecs.utf_32_le_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_32_le_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_32_le_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_32_le_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_32_le_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-32-le', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/utf_7.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -""" Python 'utf-7' Codec - -Written by Brian Quinlan (brian@sweetapp.com). -""" -import codecs - -### Codec APIs - -encode = codecs.utf_7_encode - -def decode(input, errors='strict'): - return codecs.utf_7_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_7_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_7_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_7_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_7_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-7', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/utf_8.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,42 +0,0 @@ -""" Python 'utf-8' Codec - - -Written by Marc-Andre Lemburg (mal@lemburg.com). - -(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. - -""" -import codecs - -### Codec APIs - -encode = codecs.utf_8_encode - -def decode(input, errors='strict'): - return codecs.utf_8_decode(input, errors, True) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return codecs.utf_8_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - _buffer_decode = codecs.utf_8_decode - -class StreamWriter(codecs.StreamWriter): - encode = codecs.utf_8_encode - -class StreamReader(codecs.StreamReader): - decode = codecs.utf_8_decode - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-8', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/utf_8_sig.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,117 +0,0 @@ -""" Python 'utf-8-sig' Codec -This work similar to UTF-8 with the following changes: - -* On encoding/writing a UTF-8 encoded BOM will be prepended/written as the - first three bytes. - -* On decoding/reading if the first three bytes are a UTF-8 encoded BOM, these - bytes will be skipped. -""" -import codecs - -### Codec APIs - -def encode(input, errors='strict'): - return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input)) - -def decode(input, errors='strict'): - prefix = 0 - if input[:3] == codecs.BOM_UTF8: - input = input[3:] - prefix = 3 - (output, consumed) = codecs.utf_8_decode(input, errors, True) - return (output, consumed+prefix) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - codecs.IncrementalEncoder.__init__(self, errors) - self.first = 1 - - def encode(self, input, final=False): - if self.first: - self.first = 0 - return codecs.BOM_UTF8 + codecs.utf_8_encode(input, self.errors)[0] - else: - return codecs.utf_8_encode(input, self.errors)[0] - - def reset(self): - codecs.IncrementalEncoder.reset(self) - self.first = 1 - - def getstate(self): - return self.first - - def setstate(self, state): - self.first = state - -class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def __init__(self, errors='strict'): - codecs.BufferedIncrementalDecoder.__init__(self, errors) - self.first = True - - def _buffer_decode(self, input, errors, final): - if self.first: - if len(input) < 3: - if codecs.BOM_UTF8.startswith(input): - # not enough data to decide if this really is a BOM - # => try again on the next call - return (u"", 0) - else: - self.first = None - else: - self.first = None - if input[:3] == codecs.BOM_UTF8: - (output, consumed) = codecs.utf_8_decode(input[3:], errors, final) - return (output, consumed+3) - return codecs.utf_8_decode(input, errors, final) - - def reset(self): - codecs.BufferedIncrementalDecoder.reset(self) - self.first = True - -class StreamWriter(codecs.StreamWriter): - def reset(self): - codecs.StreamWriter.reset(self) - try: - del self.encode - except AttributeError: - pass - - def encode(self, input, errors='strict'): - self.encode = codecs.utf_8_encode - return encode(input, errors) - -class StreamReader(codecs.StreamReader): - def reset(self): - codecs.StreamReader.reset(self) - try: - del self.decode - except AttributeError: - pass - - def decode(self, input, errors='strict'): - if len(input) < 3: - if codecs.BOM_UTF8.startswith(input): - # not enough data to decide if this is a BOM - # => try again on the next call - return (u"", 0) - elif input[:3] == codecs.BOM_UTF8: - self.decode = codecs.utf_8_decode - (output, consumed) = codecs.utf_8_decode(input[3:],errors) - return (output, consumed+3) - # (else) no BOM present - self.decode = codecs.utf_8_decode - return codecs.utf_8_decode(input, errors) - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='utf-8-sig', - encode=encode, - decode=decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - )
--- a/test/lib/python2.7/encodings/uu_codec.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,130 +0,0 @@ -""" Python 'uu_codec' Codec - UU content transfer encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Written by Marc-Andre Lemburg (mal@lemburg.com). Some details were - adapted from uu.py which was written by Lance Ellinghouse and - modified by Jack Jansen and Fredrik Lundh. - -""" -import codecs, binascii - -### Codec APIs - -def uu_encode(input,errors='strict',filename='<data>',mode=0666): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - from cStringIO import StringIO - from binascii import b2a_uu - # using str() because of cStringIO's Unicode undesired Unicode behavior. - infile = StringIO(str(input)) - outfile = StringIO() - read = infile.read - write = outfile.write - - # Encode - write('begin %o %s\n' % (mode & 0777, filename)) - chunk = read(45) - while chunk: - write(b2a_uu(chunk)) - chunk = read(45) - write(' \nend\n') - - return (outfile.getvalue(), len(input)) - -def uu_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - Note: filename and file mode information in the input data is - ignored. - - """ - assert errors == 'strict' - from cStringIO import StringIO - from binascii import a2b_uu - infile = StringIO(str(input)) - outfile = StringIO() - readline = infile.readline - write = outfile.write - - # Find start of encoded data - while 1: - s = readline() - if not s: - raise ValueError, 'Missing "begin" line in input data' - if s[:5] == 'begin': - break - - # Decode - while 1: - s = readline() - if not s or \ - s == 'end\n': - break - try: - data = a2b_uu(s) - except binascii.Error, v: - # Workaround for broken uuencoders by /Fredrik Lundh - nbytes = (((ord(s[0])-32) & 63) * 4 + 5) // 3 - data = a2b_uu(s[:nbytes]) - #sys.stderr.write("Warning: %s\n" % str(v)) - write(data) - if not s: - raise ValueError, 'Truncated input data' - - return (outfile.getvalue(), len(input)) - -class Codec(codecs.Codec): - - def encode(self,input,errors='strict'): - return uu_encode(input,errors) - - def decode(self,input,errors='strict'): - return uu_decode(input,errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def encode(self, input, final=False): - return uu_encode(input, self.errors)[0] - -class IncrementalDecoder(codecs.IncrementalDecoder): - def decode(self, input, final=False): - return uu_decode(input, self.errors)[0] - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='uu', - encode=uu_encode, - decode=uu_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - _is_text_encoding=False, - )
--- a/test/lib/python2.7/encodings/zlib_codec.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -""" Python 'zlib_codec' Codec - zlib compression encoding - - Unlike most of the other codecs which target Unicode, this codec - will return Python string objects for both encode and decode. - - Written by Marc-Andre Lemburg (mal@lemburg.com). - -""" -import codecs -import zlib # this codec needs the optional zlib module ! - -### Codec APIs - -def zlib_encode(input,errors='strict'): - - """ Encodes the object input and returns a tuple (output - object, length consumed). - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = zlib.compress(input) - return (output, len(input)) - -def zlib_decode(input,errors='strict'): - - """ Decodes the object input and returns a tuple (output - object, length consumed). - - input must be an object which provides the bf_getreadbuf - buffer slot. Python strings, buffer objects and memory - mapped files are examples of objects providing this slot. - - errors defines the error handling to apply. It defaults to - 'strict' handling which is the only currently supported - error handling for this codec. - - """ - assert errors == 'strict' - output = zlib.decompress(input) - return (output, len(input)) - -class Codec(codecs.Codec): - - def encode(self, input, errors='strict'): - return zlib_encode(input, errors) - def decode(self, input, errors='strict'): - return zlib_decode(input, errors) - -class IncrementalEncoder(codecs.IncrementalEncoder): - def __init__(self, errors='strict'): - assert errors == 'strict' - self.errors = errors - self.compressobj = zlib.compressobj() - - def encode(self, input, final=False): - if final: - c = self.compressobj.compress(input) - return c + self.compressobj.flush() - else: - return self.compressobj.compress(input) - - def reset(self): - self.compressobj = zlib.compressobj() - -class IncrementalDecoder(codecs.IncrementalDecoder): - def __init__(self, errors='strict'): - assert errors == 'strict' - self.errors = errors - self.decompressobj = zlib.decompressobj() - - def decode(self, input, final=False): - if final: - c = self.decompressobj.decompress(input) - return c + self.decompressobj.flush() - else: - return self.decompressobj.decompress(input) - - def reset(self): - self.decompressobj = zlib.decompressobj() - -class StreamWriter(Codec,codecs.StreamWriter): - pass - -class StreamReader(Codec,codecs.StreamReader): - pass - -### encodings module API - -def getregentry(): - return codecs.CodecInfo( - name='zlib', - encode=zlib_encode, - decode=zlib_decode, - incrementalencoder=IncrementalEncoder, - incrementaldecoder=IncrementalDecoder, - streamreader=StreamReader, - streamwriter=StreamWriter, - _is_text_encoding=False, - )
--- a/test/lib/python2.7/fnmatch.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,120 +0,0 @@ -"""Filename matching with shell patterns. - -fnmatch(FILENAME, PATTERN) matches according to the local convention. -fnmatchcase(FILENAME, PATTERN) always takes case in account. - -The functions operate by translating the pattern into a regular -expression. They cache the compiled regular expressions for speed. - -The function translate(PATTERN) returns a regular expression -corresponding to PATTERN. (It does not compile it.) -""" - -import re - -__all__ = ["filter", "fnmatch", "fnmatchcase", "translate"] - -_cache = {} -_MAXCACHE = 100 - -def _purge(): - """Clear the pattern cache""" - _cache.clear() - -def fnmatch(name, pat): - """Test whether FILENAME matches PATTERN. - - Patterns are Unix shell style: - - * matches everything - ? matches any single character - [seq] matches any character in seq - [!seq] matches any char not in seq - - An initial period in FILENAME is not special. - Both FILENAME and PATTERN are first case-normalized - if the operating system requires it. - If you don't want this, use fnmatchcase(FILENAME, PATTERN). - """ - - import os - name = os.path.normcase(name) - pat = os.path.normcase(pat) - return fnmatchcase(name, pat) - -def filter(names, pat): - """Return the subset of the list NAMES that match PAT""" - import os,posixpath - result=[] - pat=os.path.normcase(pat) - try: - re_pat = _cache[pat] - except KeyError: - res = translate(pat) - if len(_cache) >= _MAXCACHE: - _cache.clear() - _cache[pat] = re_pat = re.compile(res) - match = re_pat.match - if os.path is posixpath: - # normcase on posix is NOP. Optimize it away from the loop. - for name in names: - if match(name): - result.append(name) - else: - for name in names: - if match(os.path.normcase(name)): - result.append(name) - return result - -def fnmatchcase(name, pat): - """Test whether FILENAME matches PATTERN, including case. - - This is a version of fnmatch() which doesn't case-normalize - its arguments. - """ - - try: - re_pat = _cache[pat] - except KeyError: - res = translate(pat) - if len(_cache) >= _MAXCACHE: - _cache.clear() - _cache[pat] = re_pat = re.compile(res) - return re_pat.match(name) is not None - -def translate(pat): - """Translate a shell PATTERN to a regular expression. - - There is no way to quote meta-characters. - """ - - i, n = 0, len(pat) - res = '' - while i < n: - c = pat[i] - i = i+1 - if c == '*': - res = res + '.*' - elif c == '?': - res = res + '.' - elif c == '[': - j = i - if j < n and pat[j] == '!': - j = j+1 - if j < n and pat[j] == ']': - j = j+1 - while j < n and pat[j] != ']': - j = j+1 - if j >= n: - res = res + '\\[' - else: - stuff = pat[i:j].replace('\\','\\\\') - i = j+1 - if stuff[0] == '!': - stuff = '^' + stuff[1:] - elif stuff[0] == '^': - stuff = '\\' + stuff - res = '%s[%s]' % (res, stuff) - else: - res = res + re.escape(c) - return res + '\Z(?ms)'
--- a/test/lib/python2.7/genericpath.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -""" -Path operations common to more than one OS -Do not use directly. The OS specific modules import the appropriate -functions from this module themselves. -""" -import os -import stat - -__all__ = ['commonprefix', 'exists', 'getatime', 'getctime', 'getmtime', - 'getsize', 'isdir', 'isfile'] - - -try: - _unicode = unicode -except NameError: - # If Python is built without Unicode support, the unicode type - # will not exist. Fake one. - class _unicode(object): - pass - -# Does a path exist? -# This is false for dangling symbolic links on systems that support them. -def exists(path): - """Test whether a path exists. Returns False for broken symbolic links""" - try: - os.stat(path) - except os.error: - return False - return True - - -# This follows symbolic links, so both islink() and isdir() can be true -# for the same path on systems that support symlinks -def isfile(path): - """Test whether a path is a regular file""" - try: - st = os.stat(path) - except os.error: - return False - return stat.S_ISREG(st.st_mode) - - -# Is a path a directory? -# This follows symbolic links, so both islink() and isdir() -# can be true for the same path on systems that support symlinks -def isdir(s): - """Return true if the pathname refers to an existing directory.""" - try: - st = os.stat(s) - except os.error: - return False - return stat.S_ISDIR(st.st_mode) - - -def getsize(filename): - """Return the size of a file, reported by os.stat().""" - return os.stat(filename).st_size - - -def getmtime(filename): - """Return the last modification time of a file, reported by os.stat().""" - return os.stat(filename).st_mtime - - -def getatime(filename): - """Return the last access time of a file, reported by os.stat().""" - return os.stat(filename).st_atime - - -def getctime(filename): - """Return the metadata change time of a file, reported by os.stat().""" - return os.stat(filename).st_ctime - - -# Return the longest prefix of all list elements. -def commonprefix(m): - "Given a list of pathnames, returns the longest common leading component" - if not m: return '' - s1 = min(m) - s2 = max(m) - for i, c in enumerate(s1): - if c != s2[i]: - return s1[:i] - return s1 - -# Split a path in root and extension. -# The extension is everything starting at the last dot in the last -# pathname component; the root is everything before that. -# It is always true that root + ext == p. - -# Generic implementation of splitext, to be parametrized with -# the separators -def _splitext(p, sep, altsep, extsep): - """Split the extension from a pathname. - - Extension is everything from the last dot to the end, ignoring - leading dots. Returns "(root, ext)"; ext may be empty.""" - - sepIndex = p.rfind(sep) - if altsep: - altsepIndex = p.rfind(altsep) - sepIndex = max(sepIndex, altsepIndex) - - dotIndex = p.rfind(extsep) - if dotIndex > sepIndex: - # skip all leading dots - filenameIndex = sepIndex + 1 - while filenameIndex < dotIndex: - if p[filenameIndex] != extsep: - return p[:dotIndex], p[dotIndex:] - filenameIndex += 1 - - return p, ''
--- a/test/lib/python2.7/lib-dynload/Python-2.7.egg-info Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -Metadata-Version: 1.1 -Name: Python -Version: 2.7.12 -Summary: A high-level object-oriented programming language -Home-page: http://www.python.org/2.7 -Author: Guido van Rossum and the Python community -Author-email: python-dev@python.org -License: PSF license -Description: Python is an interpreted, interactive, object-oriented programming - language. It is often compared to Tcl, Perl, Scheme or Java. - - Python combines remarkable power with very clear syntax. It has - modules, classes, exceptions, very high level dynamic data types, and - dynamic typing. There are interfaces to many system calls and - libraries, as well as to various windowing systems (X11, Motif, Tk, - Mac, MFC). New built-in modules are easily written in C or C++. Python - is also usable as an extension language for applications that need a - programmable interface. - - The Python implementation is portable: it runs on many brands of UNIX, - on Windows, DOS, OS/2, Mac, Amiga... If your favorite system isn't - listed here, it may still be supported, if there's a C compiler for - it. Ask around on comp.lang.python -- or just try compiling Python - yourself. -Platform: Many -Classifier: Development Status :: 6 - Mature -Classifier: License :: OSI Approved :: Python Software Foundation License -Classifier: Natural Language :: English -Classifier: Programming Language :: C -Classifier: Programming Language :: Python -Classifier: Topic :: Software Development
--- a/test/lib/python2.7/linecache.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,139 +0,0 @@ -"""Cache lines from files. - -This is intended to read lines from modules imported -- hence if a filename -is not found, it will look down the module search path for a file by -that name. -""" - -import sys -import os - -__all__ = ["getline", "clearcache", "checkcache"] - -def getline(filename, lineno, module_globals=None): - lines = getlines(filename, module_globals) - if 1 <= lineno <= len(lines): - return lines[lineno-1] - else: - return '' - - -# The cache - -cache = {} # The cache - - -def clearcache(): - """Clear the cache entirely.""" - - global cache - cache = {} - - -def getlines(filename, module_globals=None): - """Get the lines for a file from the cache. - Update the cache if it doesn't contain an entry for this file already.""" - - if filename in cache: - return cache[filename][2] - - try: - return updatecache(filename, module_globals) - except MemoryError: - clearcache() - return [] - - -def checkcache(filename=None): - """Discard cache entries that are out of date. - (This is not checked upon each call!)""" - - if filename is None: - filenames = cache.keys() - else: - if filename in cache: - filenames = [filename] - else: - return - - for filename in filenames: - size, mtime, lines, fullname = cache[filename] - if mtime is None: - continue # no-op for files loaded via a __loader__ - try: - stat = os.stat(fullname) - except os.error: - del cache[filename] - continue - if size != stat.st_size or mtime != stat.st_mtime: - del cache[filename] - - -def updatecache(filename, module_globals=None): - """Update a cache entry and return its list of lines. - If something's wrong, print a message, discard the cache entry, - and return an empty list.""" - - if filename in cache: - del cache[filename] - if not filename or (filename.startswith('<') and filename.endswith('>')): - return [] - - fullname = filename - try: - stat = os.stat(fullname) - except OSError: - basename = filename - - # Try for a __loader__, if available - if module_globals and '__loader__' in module_globals: - name = module_globals.get('__name__') - loader = module_globals['__loader__'] - get_source = getattr(loader, 'get_source', None) - - if name and get_source: - try: - data = get_source(name) - except (ImportError, IOError): - pass - else: - if data is None: - # No luck, the PEP302 loader cannot find the source - # for this module. - return [] - cache[filename] = ( - len(data), None, - [line+'\n' for line in data.splitlines()], fullname - ) - return cache[filename][2] - - # Try looking through the module search path, which is only useful - # when handling a relative filename. - if os.path.isabs(filename): - return [] - - for dirname in sys.path: - # When using imputil, sys.path may contain things other than - # strings; ignore them when it happens. - try: - fullname = os.path.join(dirname, basename) - except (TypeError, AttributeError): - # Not sufficiently string-like to do anything useful with. - continue - try: - stat = os.stat(fullname) - break - except os.error: - pass - else: - return [] - try: - with open(fullname, 'rU') as fp: - lines = fp.readlines() - except IOError: - return [] - if lines and not lines[-1].endswith('\n'): - lines[-1] += '\n' - size, mtime = stat.st_size, stat.st_mtime - cache[filename] = size, mtime, lines, fullname - return lines
--- a/test/lib/python2.7/locale.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2064 +0,0 @@ -"""Locale support module. - -The module provides low-level access to the C lib's locale APIs and adds high -level number formatting APIs as well as a locale aliasing engine to complement -these. - -The aliasing engine includes support for many commonly used locale names and -maps them to values suitable for passing to the C lib's setlocale() function. It -also includes default encodings for all supported locale names. -""" - -import sys -import encodings -import encodings.aliases -import re -import operator -import functools - -# keep a copy of the builtin str type, because 'str' name is overridden -# in globals by a function below -_str = str - -try: - _unicode = unicode -except NameError: - # If Python is built without Unicode support, the unicode type - # will not exist. Fake one. - class _unicode(object): - pass - -# Try importing the _locale module. -# -# If this fails, fall back on a basic 'C' locale emulation. - -# Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before -# trying the import. So __all__ is also fiddled at the end of the file. -__all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error", - "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm", - "str", "atof", "atoi", "format", "format_string", "currency", - "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY", - "LC_NUMERIC", "LC_ALL", "CHAR_MAX"] - -try: - - from _locale import * - -except ImportError: - - # Locale emulation - - CHAR_MAX = 127 - LC_ALL = 6 - LC_COLLATE = 3 - LC_CTYPE = 0 - LC_MESSAGES = 5 - LC_MONETARY = 4 - LC_NUMERIC = 1 - LC_TIME = 2 - Error = ValueError - - def localeconv(): - """ localeconv() -> dict. - Returns numeric and monetary locale-specific parameters. - """ - # 'C' locale default values - return {'grouping': [127], - 'currency_symbol': '', - 'n_sign_posn': 127, - 'p_cs_precedes': 127, - 'n_cs_precedes': 127, - 'mon_grouping': [], - 'n_sep_by_space': 127, - 'decimal_point': '.', - 'negative_sign': '', - 'positive_sign': '', - 'p_sep_by_space': 127, - 'int_curr_symbol': '', - 'p_sign_posn': 127, - 'thousands_sep': '', - 'mon_thousands_sep': '', - 'frac_digits': 127, - 'mon_decimal_point': '', - 'int_frac_digits': 127} - - def setlocale(category, value=None): - """ setlocale(integer,string=None) -> string. - Activates/queries locale processing. - """ - if value not in (None, '', 'C'): - raise Error, '_locale emulation only supports "C" locale' - return 'C' - - def strcoll(a,b): - """ strcoll(string,string) -> int. - Compares two strings according to the locale. - """ - return cmp(a,b) - - def strxfrm(s): - """ strxfrm(string) -> string. - Returns a string that behaves for cmp locale-aware. - """ - return s - - -_localeconv = localeconv - -# With this dict, you can override some items of localeconv's return value. -# This is useful for testing purposes. -_override_localeconv = {} - -@functools.wraps(_localeconv) -def localeconv(): - d = _localeconv() - if _override_localeconv: - d.update(_override_localeconv) - return d - - -### Number formatting APIs - -# Author: Martin von Loewis -# improved by Georg Brandl - -# Iterate over grouping intervals -def _grouping_intervals(grouping): - last_interval = None - for interval in grouping: - # if grouping is -1, we are done - if interval == CHAR_MAX: - return - # 0: re-use last group ad infinitum - if interval == 0: - if last_interval is None: - raise ValueError("invalid grouping") - while True: - yield last_interval - yield interval - last_interval = interval - -#perform the grouping from right to left -def _group(s, monetary=False): - conv = localeconv() - thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep'] - grouping = conv[monetary and 'mon_grouping' or 'grouping'] - if not grouping: - return (s, 0) - if s[-1] == ' ': - stripped = s.rstrip() - right_spaces = s[len(stripped):] - s = stripped - else: - right_spaces = '' - left_spaces = '' - groups = [] - for interval in _grouping_intervals(grouping): - if not s or s[-1] not in "0123456789": - # only non-digit characters remain (sign, spaces) - left_spaces = s - s = '' - break - groups.append(s[-interval:]) - s = s[:-interval] - if s: - groups.append(s) - groups.reverse() - return ( - left_spaces + thousands_sep.join(groups) + right_spaces, - len(thousands_sep) * (len(groups) - 1) - ) - -# Strip a given amount of excess padding from the given string -def _strip_padding(s, amount): - lpos = 0 - while amount and s[lpos] == ' ': - lpos += 1 - amount -= 1 - rpos = len(s) - 1 - while amount and s[rpos] == ' ': - rpos -= 1 - amount -= 1 - return s[lpos:rpos+1] - -_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?' - r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]') - -def format(percent, value, grouping=False, monetary=False, *additional): - """Returns the locale-aware substitution of a %? specifier - (percent). - - additional is for format strings which contain one or more - '*' modifiers.""" - # this is only for one-percent-specifier strings and this should be checked - match = _percent_re.match(percent) - if not match or len(match.group())!= len(percent): - raise ValueError(("format() must be given exactly one %%char " - "format specifier, %s not valid") % repr(percent)) - return _format(percent, value, grouping, monetary, *additional) - -def _format(percent, value, grouping=False, monetary=False, *additional): - if additional: - formatted = percent % ((value,) + additional) - else: - formatted = percent % value - # floats and decimal ints need special action! - if percent[-1] in 'eEfFgG': - seps = 0 - parts = formatted.split('.') - if grouping: - parts[0], seps = _group(parts[0], monetary=monetary) - decimal_point = localeconv()[monetary and 'mon_decimal_point' - or 'decimal_point'] - formatted = decimal_point.join(parts) - if seps: - formatted = _strip_padding(formatted, seps) - elif percent[-1] in 'diu': - seps = 0 - if grouping: - formatted, seps = _group(formatted, monetary=monetary) - if seps: - formatted = _strip_padding(formatted, seps) - return formatted - -def format_string(f, val, grouping=False): - """Formats a string in the same way that the % formatting would use, - but takes the current locale into account. - Grouping is applied if the third parameter is true.""" - percents = list(_percent_re.finditer(f)) - new_f = _percent_re.sub('%s', f) - - if operator.isMappingType(val): - new_val = [] - for perc in percents: - if perc.group()[-1]=='%': - new_val.append('%') - else: - new_val.append(format(perc.group(), val, grouping)) - else: - if not isinstance(val, tuple): - val = (val,) - new_val = [] - i = 0 - for perc in percents: - if perc.group()[-1]=='%': - new_val.append('%') - else: - starcount = perc.group('modifiers').count('*') - new_val.append(_format(perc.group(), - val[i], - grouping, - False, - *val[i+1:i+1+starcount])) - i += (1 + starcount) - val = tuple(new_val) - - return new_f % val - -def currency(val, symbol=True, grouping=False, international=False): - """Formats val according to the currency settings - in the current locale.""" - conv = localeconv() - - # check for illegal values - digits = conv[international and 'int_frac_digits' or 'frac_digits'] - if digits == 127: - raise ValueError("Currency formatting is not possible using " - "the 'C' locale.") - - s = format('%%.%if' % digits, abs(val), grouping, monetary=True) - # '<' and '>' are markers if the sign must be inserted between symbol and value - s = '<' + s + '>' - - if symbol: - smb = conv[international and 'int_curr_symbol' or 'currency_symbol'] - precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes'] - separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space'] - - if precedes: - s = smb + (separated and ' ' or '') + s - else: - s = s + (separated and ' ' or '') + smb - - sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn'] - sign = conv[val<0 and 'negative_sign' or 'positive_sign'] - - if sign_pos == 0: - s = '(' + s + ')' - elif sign_pos == 1: - s = sign + s - elif sign_pos == 2: - s = s + sign - elif sign_pos == 3: - s = s.replace('<', sign) - elif sign_pos == 4: - s = s.replace('>', sign) - else: - # the default if nothing specified; - # this should be the most fitting sign position - s = sign + s - - return s.replace('<', '').replace('>', '') - -def str(val): - """Convert float to string, taking the locale into account.""" - return format("%.12g", val) - -def atof(string, func=float): - "Parses a string as a float according to the locale settings." - #First, get rid of the grouping - ts = localeconv()['thousands_sep'] - if ts: - string = string.replace(ts, '') - #next, replace the decimal point with a dot - dd = localeconv()['decimal_point'] - if dd: - string = string.replace(dd, '.') - #finally, parse the string - return func(string) - -def atoi(str): - "Converts a string to an integer according to the locale settings." - return atof(str, int) - -def _test(): - setlocale(LC_ALL, "") - #do grouping - s1 = format("%d", 123456789,1) - print s1, "is", atoi(s1) - #standard formatting - s1 = str(3.14) - print s1, "is", atof(s1) - -### Locale name aliasing engine - -# Author: Marc-Andre Lemburg, mal@lemburg.com -# Various tweaks by Fredrik Lundh <fredrik@pythonware.com> - -# store away the low-level version of setlocale (it's -# overridden below) -_setlocale = setlocale - -# Avoid relying on the locale-dependent .lower() method -# (see issue #1813). -_ascii_lower_map = ''.join( - chr(x + 32 if x >= ord('A') and x <= ord('Z') else x) - for x in range(256) -) - -def _replace_encoding(code, encoding): - if '.' in code: - langname = code[:code.index('.')] - else: - langname = code - # Convert the encoding to a C lib compatible encoding string - norm_encoding = encodings.normalize_encoding(encoding) - #print('norm encoding: %r' % norm_encoding) - norm_encoding = encodings.aliases.aliases.get(norm_encoding, - norm_encoding) - #print('aliased encoding: %r' % norm_encoding) - encoding = locale_encoding_alias.get(norm_encoding, - norm_encoding) - #print('found encoding %r' % encoding) - return langname + '.' + encoding - -def normalize(localename): - - """ Returns a normalized locale code for the given locale - name. - - The returned locale code is formatted for use with - setlocale(). - - If normalization fails, the original name is returned - unchanged. - - If the given encoding is not known, the function defaults to - the default encoding for the locale code just like setlocale() - does. - - """ - # Normalize the locale name and extract the encoding and modifier - if isinstance(localename, _unicode): - localename = localename.encode('ascii') - code = localename.translate(_ascii_lower_map) - if ':' in code: - # ':' is sometimes used as encoding delimiter. - code = code.replace(':', '.') - if '@' in code: - code, modifier = code.split('@', 1) - else: - modifier = '' - if '.' in code: - langname, encoding = code.split('.')[:2] - else: - langname = code - encoding = '' - - # First lookup: fullname (possibly with encoding and modifier) - lang_enc = langname - if encoding: - norm_encoding = encoding.replace('-', '') - norm_encoding = norm_encoding.replace('_', '') - lang_enc += '.' + norm_encoding - lookup_name = lang_enc - if modifier: - lookup_name += '@' + modifier - code = locale_alias.get(lookup_name, None) - if code is not None: - return code - #print('first lookup failed') - - if modifier: - # Second try: fullname without modifier (possibly with encoding) - code = locale_alias.get(lang_enc, None) - if code is not None: - #print('lookup without modifier succeeded') - if '@' not in code: - return code + '@' + modifier - if code.split('@', 1)[1].translate(_ascii_lower_map) == modifier: - return code - #print('second lookup failed') - - if encoding: - # Third try: langname (without encoding, possibly with modifier) - lookup_name = langname - if modifier: - lookup_name += '@' + modifier - code = locale_alias.get(lookup_name, None) - if code is not None: - #print('lookup without encoding succeeded') - if '@' not in code: - return _replace_encoding(code, encoding) - code, modifier = code.split('@', 1) - return _replace_encoding(code, encoding) + '@' + modifier - - if modifier: - # Fourth try: langname (without encoding and modifier) - code = locale_alias.get(langname, None) - if code is not None: - #print('lookup without modifier and encoding succeeded') - if '@' not in code: - return _replace_encoding(code, encoding) + '@' + modifier - code, defmod = code.split('@', 1) - if defmod.translate(_ascii_lower_map) == modifier: - return _replace_encoding(code, encoding) + '@' + defmod - - return localename - -def _parse_localename(localename): - - """ Parses the locale code for localename and returns the - result as tuple (language code, encoding). - - The localename is normalized and passed through the locale - alias engine. A ValueError is raised in case the locale name - cannot be parsed. - - The language code corresponds to RFC 1766. code and encoding - can be None in case the values cannot be determined or are - unknown to this implementation. - - """ - code = normalize(localename) - if '@' in code: - # Deal with locale modifiers - code, modifier = code.split('@', 1) - if modifier == 'euro' and '.' not in code: - # Assume Latin-9 for @euro locales. This is bogus, - # since some systems may use other encodings for these - # locales. Also, we ignore other modifiers. - return code, 'iso-8859-15' - - if '.' in code: - return tuple(code.split('.')[:2]) - elif code == 'C': - return None, None - raise ValueError, 'unknown locale: %s' % localename - -def _build_localename(localetuple): - - """ Builds a locale code from the given tuple (language code, - encoding). - - No aliasing or normalizing takes place. - - """ - language, encoding = localetuple - if language is None: - language = 'C' - if encoding is None: - return language - else: - return language + '.' + encoding - -def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')): - - """ Tries to determine the default locale settings and returns - them as tuple (language code, encoding). - - According to POSIX, a program which has not called - setlocale(LC_ALL, "") runs using the portable 'C' locale. - Calling setlocale(LC_ALL, "") lets it use the default locale as - defined by the LANG variable. Since we don't want to interfere - with the current locale setting we thus emulate the behavior - in the way described above. - - To maintain compatibility with other platforms, not only the - LANG variable is tested, but a list of variables given as - envvars parameter. The first found to be defined will be - used. envvars defaults to the search path used in GNU gettext; - it must always contain the variable name 'LANG'. - - Except for the code 'C', the language code corresponds to RFC - 1766. code and encoding can be None in case the values cannot - be determined. - - """ - - try: - # check if it's supported by the _locale module - import _locale - code, encoding = _locale._getdefaultlocale() - except (ImportError, AttributeError): - pass - else: - # make sure the code/encoding values are valid - if sys.platform == "win32" and code and code[:2] == "0x": - # map windows language identifier to language name - code = windows_locale.get(int(code, 0)) - # ...add other platform-specific processing here, if - # necessary... - return code, encoding - - # fall back on POSIX behaviour - import os - lookup = os.environ.get - for variable in envvars: - localename = lookup(variable,None) - if localename: - if variable == 'LANGUAGE': - localename = localename.split(':')[0] - break - else: - localename = 'C' - return _parse_localename(localename) - - -def getlocale(category=LC_CTYPE): - - """ Returns the current setting for the given locale category as - tuple (language code, encoding). - - category may be one of the LC_* value except LC_ALL. It - defaults to LC_CTYPE. - - Except for the code 'C', the language code corresponds to RFC - 1766. code and encoding can be None in case the values cannot - be determined. - - """ - localename = _setlocale(category) - if category == LC_ALL and ';' in localename: - raise TypeError, 'category LC_ALL is not supported' - return _parse_localename(localename) - -def setlocale(category, locale=None): - - """ Set the locale for the given category. The locale can be - a string, an iterable of two strings (language code and encoding), - or None. - - Iterables are converted to strings using the locale aliasing - engine. Locale strings are passed directly to the C lib. - - category may be given as one of the LC_* values. - - """ - if locale and not isinstance(locale, (_str, _unicode)): - # convert to string - locale = normalize(_build_localename(locale)) - return _setlocale(category, locale) - -def resetlocale(category=LC_ALL): - - """ Sets the locale for category to the default setting. - - The default setting is determined by calling - getdefaultlocale(). category defaults to LC_ALL. - - """ - _setlocale(category, _build_localename(getdefaultlocale())) - -if sys.platform.startswith("win"): - # On Win32, this will return the ANSI code page - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using.""" - import _locale - return _locale._getdefaultlocale()[1] -else: - # On Unix, if CODESET is available, use that. - try: - CODESET - except NameError: - # Fall back to parsing environment variables :-( - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using, - by looking at environment variables.""" - return getdefaultlocale()[1] - else: - def getpreferredencoding(do_setlocale = True): - """Return the charset that the user is likely using, - according to the system configuration.""" - if do_setlocale: - oldloc = setlocale(LC_CTYPE) - try: - setlocale(LC_CTYPE, "") - except Error: - pass - result = nl_langinfo(CODESET) - setlocale(LC_CTYPE, oldloc) - return result - else: - return nl_langinfo(CODESET) - - -### Database -# -# The following data was extracted from the locale.alias file which -# comes with X11 and then hand edited removing the explicit encoding -# definitions and adding some more aliases. The file is usually -# available as /usr/lib/X11/locale/locale.alias. -# - -# -# The local_encoding_alias table maps lowercase encoding alias names -# to C locale encoding names (case-sensitive). Note that normalize() -# first looks up the encoding in the encodings.aliases dictionary and -# then applies this mapping to find the correct C lib name for the -# encoding. -# -locale_encoding_alias = { - - # Mappings for non-standard encoding names used in locale names - '437': 'C', - 'c': 'C', - 'en': 'ISO8859-1', - 'jis': 'JIS7', - 'jis7': 'JIS7', - 'ajec': 'eucJP', - - # Mappings from Python codec names to C lib encoding names - 'ascii': 'ISO8859-1', - 'latin_1': 'ISO8859-1', - 'iso8859_1': 'ISO8859-1', - 'iso8859_10': 'ISO8859-10', - 'iso8859_11': 'ISO8859-11', - 'iso8859_13': 'ISO8859-13', - 'iso8859_14': 'ISO8859-14', - 'iso8859_15': 'ISO8859-15', - 'iso8859_16': 'ISO8859-16', - 'iso8859_2': 'ISO8859-2', - 'iso8859_3': 'ISO8859-3', - 'iso8859_4': 'ISO8859-4', - 'iso8859_5': 'ISO8859-5', - 'iso8859_6': 'ISO8859-6', - 'iso8859_7': 'ISO8859-7', - 'iso8859_8': 'ISO8859-8', - 'iso8859_9': 'ISO8859-9', - 'iso2022_jp': 'JIS7', - 'shift_jis': 'SJIS', - 'tactis': 'TACTIS', - 'euc_jp': 'eucJP', - 'euc_kr': 'eucKR', - 'utf_8': 'UTF-8', - 'koi8_r': 'KOI8-R', - 'koi8_u': 'KOI8-U', - # XXX This list is still incomplete. If you know more - # mappings, please file a bug report. Thanks. -} - -# -# The locale_alias table maps lowercase alias names to C locale names -# (case-sensitive). Encodings are always separated from the locale -# name using a dot ('.'); they should only be given in case the -# language name is needed to interpret the given encoding alias -# correctly (CJK codes often have this need). -# -# Note that the normalize() function which uses this tables -# removes '_' and '-' characters from the encoding part of the -# locale name before doing the lookup. This saves a lot of -# space in the table. -# -# MAL 2004-12-10: -# Updated alias mapping to most recent locale.alias file -# from X.org distribution using makelocalealias.py. -# -# These are the differences compared to the old mapping (Python 2.4 -# and older): -# -# updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' -# updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' -# updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251' -# updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' -# updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2' -# updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' -# updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1' -# updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' -# updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15' -# updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' -# updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15' -# updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' -# updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' -# updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP' -# updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13' -# updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13' -# updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' -# updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2' -# updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11' -# updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312' -# updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5' -# updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5' -# -# MAL 2008-05-30: -# Updated alias mapping to most recent locale.alias file -# from X.org distribution using makelocalealias.py. -# -# These are the differences compared to the old mapping (Python 2.5 -# and older): -# -# updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2' -# updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2' -# updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' -# updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2' -# updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251' -# updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8' -# updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5' -# -# AP 2010-04-12: -# Updated alias mapping to most recent locale.alias file -# from X.org distribution using makelocalealias.py. -# -# These are the differences compared to the old mapping (Python 2.6.5 -# and older): -# -# updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' -# updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8' -# updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' -# updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' -# updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin' -# updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin' -# updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin' -# updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8' -# updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8' -# -# SS 2013-12-20: -# Updated alias mapping to most recent locale.alias file -# from X.org distribution using makelocalealias.py. -# -# These are the differences compared to the old mapping (Python 2.7.6 -# and older): -# -# updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' -# updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' -# updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C' -# updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2' -# updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' -# updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8' -# updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8' -# updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' -# updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8' -# updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' -# updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin' -# -# SS 2014-10-01: -# Updated alias mapping with glibc 2.19 supported locales. - -locale_alias = { - 'a3': 'az_AZ.KOI8-C', - 'a3_az': 'az_AZ.KOI8-C', - 'a3_az.koi8c': 'az_AZ.KOI8-C', - 'a3_az.koic': 'az_AZ.KOI8-C', - 'aa_dj': 'aa_DJ.ISO8859-1', - 'aa_er': 'aa_ER.UTF-8', - 'aa_et': 'aa_ET.UTF-8', - 'af': 'af_ZA.ISO8859-1', - 'af_za': 'af_ZA.ISO8859-1', - 'af_za.iso88591': 'af_ZA.ISO8859-1', - 'am': 'am_ET.UTF-8', - 'am_et': 'am_ET.UTF-8', - 'american': 'en_US.ISO8859-1', - 'american.iso88591': 'en_US.ISO8859-1', - 'an_es': 'an_ES.ISO8859-15', - 'ar': 'ar_AA.ISO8859-6', - 'ar_aa': 'ar_AA.ISO8859-6', - 'ar_aa.iso88596': 'ar_AA.ISO8859-6', - 'ar_ae': 'ar_AE.ISO8859-6', - 'ar_ae.iso88596': 'ar_AE.ISO8859-6', - 'ar_bh': 'ar_BH.ISO8859-6', - 'ar_bh.iso88596': 'ar_BH.ISO8859-6', - 'ar_dz': 'ar_DZ.ISO8859-6', - 'ar_dz.iso88596': 'ar_DZ.ISO8859-6', - 'ar_eg': 'ar_EG.ISO8859-6', - 'ar_eg.iso88596': 'ar_EG.ISO8859-6', - 'ar_in': 'ar_IN.UTF-8', - 'ar_iq': 'ar_IQ.ISO8859-6', - 'ar_iq.iso88596': 'ar_IQ.ISO8859-6', - 'ar_jo': 'ar_JO.ISO8859-6', - 'ar_jo.iso88596': 'ar_JO.ISO8859-6', - 'ar_kw': 'ar_KW.ISO8859-6', - 'ar_kw.iso88596': 'ar_KW.ISO8859-6', - 'ar_lb': 'ar_LB.ISO8859-6', - 'ar_lb.iso88596': 'ar_LB.ISO8859-6', - 'ar_ly': 'ar_LY.ISO8859-6', - 'ar_ly.iso88596': 'ar_LY.ISO8859-6', - 'ar_ma': 'ar_MA.ISO8859-6', - 'ar_ma.iso88596': 'ar_MA.ISO8859-6', - 'ar_om': 'ar_OM.ISO8859-6', - 'ar_om.iso88596': 'ar_OM.ISO8859-6', - 'ar_qa': 'ar_QA.ISO8859-6', - 'ar_qa.iso88596': 'ar_QA.ISO8859-6', - 'ar_sa': 'ar_SA.ISO8859-6', - 'ar_sa.iso88596': 'ar_SA.ISO8859-6', - 'ar_sd': 'ar_SD.ISO8859-6', - 'ar_sd.iso88596': 'ar_SD.ISO8859-6', - 'ar_sy': 'ar_SY.ISO8859-6', - 'ar_sy.iso88596': 'ar_SY.ISO8859-6', - 'ar_tn': 'ar_TN.ISO8859-6', - 'ar_tn.iso88596': 'ar_TN.ISO8859-6', - 'ar_ye': 'ar_YE.ISO8859-6', - 'ar_ye.iso88596': 'ar_YE.ISO8859-6', - 'arabic': 'ar_AA.ISO8859-6', - 'arabic.iso88596': 'ar_AA.ISO8859-6', - 'as': 'as_IN.UTF-8', - 'as_in': 'as_IN.UTF-8', - 'ast_es': 'ast_ES.ISO8859-15', - 'ayc_pe': 'ayc_PE.UTF-8', - 'az': 'az_AZ.ISO8859-9E', - 'az_az': 'az_AZ.ISO8859-9E', - 'az_az.iso88599e': 'az_AZ.ISO8859-9E', - 'be': 'be_BY.CP1251', - 'be@latin': 'be_BY.UTF-8@latin', - 'be_bg.utf8': 'bg_BG.UTF-8', - 'be_by': 'be_BY.CP1251', - 'be_by.cp1251': 'be_BY.CP1251', - 'be_by.microsoftcp1251': 'be_BY.CP1251', - 'be_by.utf8@latin': 'be_BY.UTF-8@latin', - 'be_by@latin': 'be_BY.UTF-8@latin', - 'bem_zm': 'bem_ZM.UTF-8', - 'ber_dz': 'ber_DZ.UTF-8', - 'ber_ma': 'ber_MA.UTF-8', - 'bg': 'bg_BG.CP1251', - 'bg_bg': 'bg_BG.CP1251', - 'bg_bg.cp1251': 'bg_BG.CP1251', - 'bg_bg.iso88595': 'bg_BG.ISO8859-5', - 'bg_bg.koi8r': 'bg_BG.KOI8-R', - 'bg_bg.microsoftcp1251': 'bg_BG.CP1251', - 'bho_in': 'bho_IN.UTF-8', - 'bn_bd': 'bn_BD.UTF-8', - 'bn_in': 'bn_IN.UTF-8', - 'bo_cn': 'bo_CN.UTF-8', - 'bo_in': 'bo_IN.UTF-8', - 'bokmal': 'nb_NO.ISO8859-1', - 'bokm\xe5l': 'nb_NO.ISO8859-1', - 'br': 'br_FR.ISO8859-1', - 'br_fr': 'br_FR.ISO8859-1', - 'br_fr.iso88591': 'br_FR.ISO8859-1', - 'br_fr.iso885914': 'br_FR.ISO8859-14', - 'br_fr.iso885915': 'br_FR.ISO8859-15', - 'br_fr.iso885915@euro': 'br_FR.ISO8859-15', - 'br_fr.utf8@euro': 'br_FR.UTF-8', - 'br_fr@euro': 'br_FR.ISO8859-15', - 'brx_in': 'brx_IN.UTF-8', - 'bs': 'bs_BA.ISO8859-2', - 'bs_ba': 'bs_BA.ISO8859-2', - 'bs_ba.iso88592': 'bs_BA.ISO8859-2', - 'bulgarian': 'bg_BG.CP1251', - 'byn_er': 'byn_ER.UTF-8', - 'c': 'C', - 'c-french': 'fr_CA.ISO8859-1', - 'c-french.iso88591': 'fr_CA.ISO8859-1', - 'c.ascii': 'C', - 'c.en': 'C', - 'c.iso88591': 'en_US.ISO8859-1', - 'c.utf8': 'en_US.UTF-8', - 'c_c': 'C', - 'c_c.c': 'C', - 'ca': 'ca_ES.ISO8859-1', - 'ca_ad': 'ca_AD.ISO8859-1', - 'ca_ad.iso88591': 'ca_AD.ISO8859-1', - 'ca_ad.iso885915': 'ca_AD.ISO8859-15', - 'ca_ad.iso885915@euro': 'ca_AD.ISO8859-15', - 'ca_ad.utf8@euro': 'ca_AD.UTF-8', - 'ca_ad@euro': 'ca_AD.ISO8859-15', - 'ca_es': 'ca_ES.ISO8859-1', - 'ca_es.iso88591': 'ca_ES.ISO8859-1', - 'ca_es.iso885915': 'ca_ES.ISO8859-15', - 'ca_es.iso885915@euro': 'ca_ES.ISO8859-15', - 'ca_es.utf8@euro': 'ca_ES.UTF-8', - 'ca_es@valencia': 'ca_ES.ISO8859-15@valencia', - 'ca_es@euro': 'ca_ES.ISO8859-15', - 'ca_fr': 'ca_FR.ISO8859-1', - 'ca_fr.iso88591': 'ca_FR.ISO8859-1', - 'ca_fr.iso885915': 'ca_FR.ISO8859-15', - 'ca_fr.iso885915@euro': 'ca_FR.ISO8859-15', - 'ca_fr.utf8@euro': 'ca_FR.UTF-8', - 'ca_fr@euro': 'ca_FR.ISO8859-15', - 'ca_it': 'ca_IT.ISO8859-1', - 'ca_it.iso88591': 'ca_IT.ISO8859-1', - 'ca_it.iso885915': 'ca_IT.ISO8859-15', - 'ca_it.iso885915@euro': 'ca_IT.ISO8859-15', - 'ca_it.utf8@euro': 'ca_IT.UTF-8', - 'ca_it@euro': 'ca_IT.ISO8859-15', - 'catalan': 'ca_ES.ISO8859-1', - 'cextend': 'en_US.ISO8859-1', - 'cextend.en': 'en_US.ISO8859-1', - 'chinese-s': 'zh_CN.eucCN', - 'chinese-t': 'zh_TW.eucTW', - 'crh_ua': 'crh_UA.UTF-8', - 'croatian': 'hr_HR.ISO8859-2', - 'cs': 'cs_CZ.ISO8859-2', - 'cs_cs': 'cs_CZ.ISO8859-2', - 'cs_cs.iso88592': 'cs_CZ.ISO8859-2', - 'cs_cz': 'cs_CZ.ISO8859-2', - 'cs_cz.iso88592': 'cs_CZ.ISO8859-2', - 'csb_pl': 'csb_PL.UTF-8', - 'cv_ru': 'cv_RU.UTF-8', - 'cy': 'cy_GB.ISO8859-1', - 'cy_gb': 'cy_GB.ISO8859-1', - 'cy_gb.iso88591': 'cy_GB.ISO8859-1', - 'cy_gb.iso885914': 'cy_GB.ISO8859-14', - 'cy_gb.iso885915': 'cy_GB.ISO8859-15', - 'cy_gb@euro': 'cy_GB.ISO8859-15', - 'cz': 'cs_CZ.ISO8859-2', - 'cz_cz': 'cs_CZ.ISO8859-2', - 'czech': 'cs_CZ.ISO8859-2', - 'da': 'da_DK.ISO8859-1', - 'da.iso885915': 'da_DK.ISO8859-15', - 'da_dk': 'da_DK.ISO8859-1', - 'da_dk.88591': 'da_DK.ISO8859-1', - 'da_dk.885915': 'da_DK.ISO8859-15', - 'da_dk.iso88591': 'da_DK.ISO8859-1', - 'da_dk.iso885915': 'da_DK.ISO8859-15', - 'da_dk@euro': 'da_DK.ISO8859-15', - 'danish': 'da_DK.ISO8859-1', - 'danish.iso88591': 'da_DK.ISO8859-1', - 'dansk': 'da_DK.ISO8859-1', - 'de': 'de_DE.ISO8859-1', - 'de.iso885915': 'de_DE.ISO8859-15', - 'de_at': 'de_AT.ISO8859-1', - 'de_at.iso88591': 'de_AT.ISO8859-1', - 'de_at.iso885915': 'de_AT.ISO8859-15', - 'de_at.iso885915@euro': 'de_AT.ISO8859-15', - 'de_at.utf8@euro': 'de_AT.UTF-8', - 'de_at@euro': 'de_AT.ISO8859-15', - 'de_be': 'de_BE.ISO8859-1', - 'de_be.iso88591': 'de_BE.ISO8859-1', - 'de_be.iso885915': 'de_BE.ISO8859-15', - 'de_be.iso885915@euro': 'de_BE.ISO8859-15', - 'de_be.utf8@euro': 'de_BE.UTF-8', - 'de_be@euro': 'de_BE.ISO8859-15', - 'de_ch': 'de_CH.ISO8859-1', - 'de_ch.iso88591': 'de_CH.ISO8859-1', - 'de_ch.iso885915': 'de_CH.ISO8859-15', - 'de_ch@euro': 'de_CH.ISO8859-15', - 'de_de': 'de_DE.ISO8859-1', - 'de_de.88591': 'de_DE.ISO8859-1', - 'de_de.885915': 'de_DE.ISO8859-15', - 'de_de.885915@euro': 'de_DE.ISO8859-15', - 'de_de.iso88591': 'de_DE.ISO8859-1', - 'de_de.iso885915': 'de_DE.ISO8859-15', - 'de_de.iso885915@euro': 'de_DE.ISO8859-15', - 'de_de.utf8@euro': 'de_DE.UTF-8', - 'de_de@euro': 'de_DE.ISO8859-15', - 'de_li.utf8': 'de_LI.UTF-8', - 'de_lu': 'de_LU.ISO8859-1', - 'de_lu.iso88591': 'de_LU.ISO8859-1', - 'de_lu.iso885915': 'de_LU.ISO8859-15', - 'de_lu.iso885915@euro': 'de_LU.ISO8859-15', - 'de_lu.utf8@euro': 'de_LU.UTF-8', - 'de_lu@euro': 'de_LU.ISO8859-15', - 'deutsch': 'de_DE.ISO8859-1', - 'doi_in': 'doi_IN.UTF-8', - 'dutch': 'nl_NL.ISO8859-1', - 'dutch.iso88591': 'nl_BE.ISO8859-1', - 'dv_mv': 'dv_MV.UTF-8', - 'dz_bt': 'dz_BT.UTF-8', - 'ee': 'ee_EE.ISO8859-4', - 'ee_ee': 'ee_EE.ISO8859-4', - 'ee_ee.iso88594': 'ee_EE.ISO8859-4', - 'eesti': 'et_EE.ISO8859-1', - 'el': 'el_GR.ISO8859-7', - 'el_cy': 'el_CY.ISO8859-7', - 'el_gr': 'el_GR.ISO8859-7', - 'el_gr.iso88597': 'el_GR.ISO8859-7', - 'el_gr@euro': 'el_GR.ISO8859-15', - 'en': 'en_US.ISO8859-1', - 'en.iso88591': 'en_US.ISO8859-1', - 'en_ag': 'en_AG.UTF-8', - 'en_au': 'en_AU.ISO8859-1', - 'en_au.iso88591': 'en_AU.ISO8859-1', - 'en_be': 'en_BE.ISO8859-1', - 'en_be@euro': 'en_BE.ISO8859-15', - 'en_bw': 'en_BW.ISO8859-1', - 'en_bw.iso88591': 'en_BW.ISO8859-1', - 'en_ca': 'en_CA.ISO8859-1', - 'en_ca.iso88591': 'en_CA.ISO8859-1', - 'en_dk': 'en_DK.ISO8859-1', - 'en_dl.utf8': 'en_DL.UTF-8', - 'en_gb': 'en_GB.ISO8859-1', - 'en_gb.88591': 'en_GB.ISO8859-1', - 'en_gb.iso88591': 'en_GB.ISO8859-1', - 'en_gb.iso885915': 'en_GB.ISO8859-15', - 'en_gb@euro': 'en_GB.ISO8859-15', - 'en_hk': 'en_HK.ISO8859-1', - 'en_hk.iso88591': 'en_HK.ISO8859-1', - 'en_ie': 'en_IE.ISO8859-1', - 'en_ie.iso88591': 'en_IE.ISO8859-1', - 'en_ie.iso885915': 'en_IE.ISO8859-15', - 'en_ie.iso885915@euro': 'en_IE.ISO8859-15', - 'en_ie.utf8@euro': 'en_IE.UTF-8', - 'en_ie@euro': 'en_IE.ISO8859-15', - 'en_in': 'en_IN.ISO8859-1', - 'en_ng': 'en_NG.UTF-8', - 'en_nz': 'en_NZ.ISO8859-1', - 'en_nz.iso88591': 'en_NZ.ISO8859-1', - 'en_ph': 'en_PH.ISO8859-1', - 'en_ph.iso88591': 'en_PH.ISO8859-1', - 'en_sg': 'en_SG.ISO8859-1', - 'en_sg.iso88591': 'en_SG.ISO8859-1', - 'en_uk': 'en_GB.ISO8859-1', - 'en_us': 'en_US.ISO8859-1', - 'en_us.88591': 'en_US.ISO8859-1', - 'en_us.885915': 'en_US.ISO8859-15', - 'en_us.iso88591': 'en_US.ISO8859-1', - 'en_us.iso885915': 'en_US.ISO8859-15', - 'en_us.iso885915@euro': 'en_US.ISO8859-15', - 'en_us@euro': 'en_US.ISO8859-15', - 'en_us@euro@euro': 'en_US.ISO8859-15', - 'en_za': 'en_ZA.ISO8859-1', - 'en_za.88591': 'en_ZA.ISO8859-1', - 'en_za.iso88591': 'en_ZA.ISO8859-1', - 'en_za.iso885915': 'en_ZA.ISO8859-15', - 'en_za@euro': 'en_ZA.ISO8859-15', - 'en_zm': 'en_ZM.UTF-8', - 'en_zw': 'en_ZW.ISO8859-1', - 'en_zw.iso88591': 'en_ZW.ISO8859-1', - 'en_zw.utf8': 'en_ZS.UTF-8', - 'eng_gb': 'en_GB.ISO8859-1', - 'eng_gb.8859': 'en_GB.ISO8859-1', - 'english': 'en_EN.ISO8859-1', - 'english.iso88591': 'en_EN.ISO8859-1', - 'english_uk': 'en_GB.ISO8859-1', - 'english_uk.8859': 'en_GB.ISO8859-1', - 'english_united-states': 'en_US.ISO8859-1', - 'english_united-states.437': 'C', - 'english_us': 'en_US.ISO8859-1', - 'english_us.8859': 'en_US.ISO8859-1', - 'english_us.ascii': 'en_US.ISO8859-1', - 'eo': 'eo_XX.ISO8859-3', - 'eo.utf8': 'eo.UTF-8', - 'eo_eo': 'eo_EO.ISO8859-3', - 'eo_eo.iso88593': 'eo_EO.ISO8859-3', - 'eo_us.utf8': 'eo_US.UTF-8', - 'eo_xx': 'eo_XX.ISO8859-3', - 'eo_xx.iso88593': 'eo_XX.ISO8859-3', - 'es': 'es_ES.ISO8859-1', - 'es_ar': 'es_AR.ISO8859-1', - 'es_ar.iso88591': 'es_AR.ISO8859-1', - 'es_bo': 'es_BO.ISO8859-1', - 'es_bo.iso88591': 'es_BO.ISO8859-1', - 'es_cl': 'es_CL.ISO8859-1', - 'es_cl.iso88591': 'es_CL.ISO8859-1', - 'es_co': 'es_CO.ISO8859-1', - 'es_co.iso88591': 'es_CO.ISO8859-1', - 'es_cr': 'es_CR.ISO8859-1', - 'es_cr.iso88591': 'es_CR.ISO8859-1', - 'es_cu': 'es_CU.UTF-8', - 'es_do': 'es_DO.ISO8859-1', - 'es_do.iso88591': 'es_DO.ISO8859-1', - 'es_ec': 'es_EC.ISO8859-1', - 'es_ec.iso88591': 'es_EC.ISO8859-1', - 'es_es': 'es_ES.ISO8859-1', - 'es_es.88591': 'es_ES.ISO8859-1', - 'es_es.iso88591': 'es_ES.ISO8859-1', - 'es_es.iso885915': 'es_ES.ISO8859-15', - 'es_es.iso885915@euro': 'es_ES.ISO8859-15', - 'es_es.utf8@euro': 'es_ES.UTF-8', - 'es_es@euro': 'es_ES.ISO8859-15', - 'es_gt': 'es_GT.ISO8859-1', - 'es_gt.iso88591': 'es_GT.ISO8859-1', - 'es_hn': 'es_HN.ISO8859-1', - 'es_hn.iso88591': 'es_HN.ISO8859-1', - 'es_mx': 'es_MX.ISO8859-1', - 'es_mx.iso88591': 'es_MX.ISO8859-1', - 'es_ni': 'es_NI.ISO8859-1', - 'es_ni.iso88591': 'es_NI.ISO8859-1', - 'es_pa': 'es_PA.ISO8859-1', - 'es_pa.iso88591': 'es_PA.ISO8859-1', - 'es_pa.iso885915': 'es_PA.ISO8859-15', - 'es_pa@euro': 'es_PA.ISO8859-15', - 'es_pe': 'es_PE.ISO8859-1', - 'es_pe.iso88591': 'es_PE.ISO8859-1', - 'es_pe.iso885915': 'es_PE.ISO8859-15', - 'es_pe@euro': 'es_PE.ISO8859-15', - 'es_pr': 'es_PR.ISO8859-1', - 'es_pr.iso88591': 'es_PR.ISO8859-1', - 'es_py': 'es_PY.ISO8859-1', - 'es_py.iso88591': 'es_PY.ISO8859-1', - 'es_py.iso885915': 'es_PY.ISO8859-15', - 'es_py@euro': 'es_PY.ISO8859-15', - 'es_sv': 'es_SV.ISO8859-1', - 'es_sv.iso88591': 'es_SV.ISO8859-1', - 'es_sv.iso885915': 'es_SV.ISO8859-15', - 'es_sv@euro': 'es_SV.ISO8859-15', - 'es_us': 'es_US.ISO8859-1', - 'es_us.iso88591': 'es_US.ISO8859-1', - 'es_uy': 'es_UY.ISO8859-1', - 'es_uy.iso88591': 'es_UY.ISO8859-1', - 'es_uy.iso885915': 'es_UY.ISO8859-15', - 'es_uy@euro': 'es_UY.ISO8859-15', - 'es_ve': 'es_VE.ISO8859-1', - 'es_ve.iso88591': 'es_VE.ISO8859-1', - 'es_ve.iso885915': 'es_VE.ISO8859-15', - 'es_ve@euro': 'es_VE.ISO8859-15', - 'estonian': 'et_EE.ISO8859-1', - 'et': 'et_EE.ISO8859-15', - 'et_ee': 'et_EE.ISO8859-15', - 'et_ee.iso88591': 'et_EE.ISO8859-1', - 'et_ee.iso885913': 'et_EE.ISO8859-13', - 'et_ee.iso885915': 'et_EE.ISO8859-15', - 'et_ee.iso88594': 'et_EE.ISO8859-4', - 'et_ee@euro': 'et_EE.ISO8859-15', - 'eu': 'eu_ES.ISO8859-1', - 'eu_es': 'eu_ES.ISO8859-1', - 'eu_es.iso88591': 'eu_ES.ISO8859-1', - 'eu_es.iso885915': 'eu_ES.ISO8859-15', - 'eu_es.iso885915@euro': 'eu_ES.ISO8859-15', - 'eu_es.utf8@euro': 'eu_ES.UTF-8', - 'eu_es@euro': 'eu_ES.ISO8859-15', - 'eu_fr': 'eu_FR.ISO8859-1', - 'fa': 'fa_IR.UTF-8', - 'fa_ir': 'fa_IR.UTF-8', - 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342', - 'ff_sn': 'ff_SN.UTF-8', - 'fi': 'fi_FI.ISO8859-15', - 'fi.iso885915': 'fi_FI.ISO8859-15', - 'fi_fi': 'fi_FI.ISO8859-15', - 'fi_fi.88591': 'fi_FI.ISO8859-1', - 'fi_fi.iso88591': 'fi_FI.ISO8859-1', - 'fi_fi.iso885915': 'fi_FI.ISO8859-15', - 'fi_fi.iso885915@euro': 'fi_FI.ISO8859-15', - 'fi_fi.utf8@euro': 'fi_FI.UTF-8', - 'fi_fi@euro': 'fi_FI.ISO8859-15', - 'fil_ph': 'fil_PH.UTF-8', - 'finnish': 'fi_FI.ISO8859-1', - 'finnish.iso88591': 'fi_FI.ISO8859-1', - 'fo': 'fo_FO.ISO8859-1', - 'fo_fo': 'fo_FO.ISO8859-1', - 'fo_fo.iso88591': 'fo_FO.ISO8859-1', - 'fo_fo.iso885915': 'fo_FO.ISO8859-15', - 'fo_fo@euro': 'fo_FO.ISO8859-15', - 'fr': 'fr_FR.ISO8859-1', - 'fr.iso885915': 'fr_FR.ISO8859-15', - 'fr_be': 'fr_BE.ISO8859-1', - 'fr_be.88591': 'fr_BE.ISO8859-1', - 'fr_be.iso88591': 'fr_BE.ISO8859-1', - 'fr_be.iso885915': 'fr_BE.ISO8859-15', - 'fr_be.iso885915@euro': 'fr_BE.ISO8859-15', - 'fr_be.utf8@euro': 'fr_BE.UTF-8', - 'fr_be@euro': 'fr_BE.ISO8859-15', - 'fr_ca': 'fr_CA.ISO8859-1', - 'fr_ca.88591': 'fr_CA.ISO8859-1', - 'fr_ca.iso88591': 'fr_CA.ISO8859-1', - 'fr_ca.iso885915': 'fr_CA.ISO8859-15', - 'fr_ca@euro': 'fr_CA.ISO8859-15', - 'fr_ch': 'fr_CH.ISO8859-1', - 'fr_ch.88591': 'fr_CH.ISO8859-1', - 'fr_ch.iso88591': 'fr_CH.ISO8859-1', - 'fr_ch.iso885915': 'fr_CH.ISO8859-15', - 'fr_ch@euro': 'fr_CH.ISO8859-15', - 'fr_fr': 'fr_FR.ISO8859-1', - 'fr_fr.88591': 'fr_FR.ISO8859-1', - 'fr_fr.iso88591': 'fr_FR.ISO8859-1', - 'fr_fr.iso885915': 'fr_FR.ISO8859-15', - 'fr_fr.iso885915@euro': 'fr_FR.ISO8859-15', - 'fr_fr.utf8@euro': 'fr_FR.UTF-8', - 'fr_fr@euro': 'fr_FR.ISO8859-15', - 'fr_lu': 'fr_LU.ISO8859-1', - 'fr_lu.88591': 'fr_LU.ISO8859-1', - 'fr_lu.iso88591': 'fr_LU.ISO8859-1', - 'fr_lu.iso885915': 'fr_LU.ISO8859-15', - 'fr_lu.iso885915@euro': 'fr_LU.ISO8859-15', - 'fr_lu.utf8@euro': 'fr_LU.UTF-8', - 'fr_lu@euro': 'fr_LU.ISO8859-15', - 'fran\xe7ais': 'fr_FR.ISO8859-1', - 'fre_fr': 'fr_FR.ISO8859-1', - 'fre_fr.8859': 'fr_FR.ISO8859-1', - 'french': 'fr_FR.ISO8859-1', - 'french.iso88591': 'fr_CH.ISO8859-1', - 'french_france': 'fr_FR.ISO8859-1', - 'french_france.8859': 'fr_FR.ISO8859-1', - 'fur_it': 'fur_IT.UTF-8', - 'fy_de': 'fy_DE.UTF-8', - 'fy_nl': 'fy_NL.UTF-8', - 'ga': 'ga_IE.ISO8859-1', - 'ga_ie': 'ga_IE.ISO8859-1', - 'ga_ie.iso88591': 'ga_IE.ISO8859-1', - 'ga_ie.iso885914': 'ga_IE.ISO8859-14', - 'ga_ie.iso885915': 'ga_IE.ISO8859-15', - 'ga_ie.iso885915@euro': 'ga_IE.ISO8859-15', - 'ga_ie.utf8@euro': 'ga_IE.UTF-8', - 'ga_ie@euro': 'ga_IE.ISO8859-15', - 'galego': 'gl_ES.ISO8859-1', - 'galician': 'gl_ES.ISO8859-1', - 'gd': 'gd_GB.ISO8859-1', - 'gd_gb': 'gd_GB.ISO8859-1', - 'gd_gb.iso88591': 'gd_GB.ISO8859-1', - 'gd_gb.iso885914': 'gd_GB.ISO8859-14', - 'gd_gb.iso885915': 'gd_GB.ISO8859-15', - 'gd_gb@euro': 'gd_GB.ISO8859-15', - 'ger_de': 'de_DE.ISO8859-1', - 'ger_de.8859': 'de_DE.ISO8859-1', - 'german': 'de_DE.ISO8859-1', - 'german.iso88591': 'de_CH.ISO8859-1', - 'german_germany': 'de_DE.ISO8859-1', - 'german_germany.8859': 'de_DE.ISO8859-1', - 'gez_er': 'gez_ER.UTF-8', - 'gez_et': 'gez_ET.UTF-8', - 'gl': 'gl_ES.ISO8859-1', - 'gl_es': 'gl_ES.ISO8859-1', - 'gl_es.iso88591': 'gl_ES.ISO8859-1', - 'gl_es.iso885915': 'gl_ES.ISO8859-15', - 'gl_es.iso885915@euro': 'gl_ES.ISO8859-15', - 'gl_es.utf8@euro': 'gl_ES.UTF-8', - 'gl_es@euro': 'gl_ES.ISO8859-15', - 'greek': 'el_GR.ISO8859-7', - 'greek.iso88597': 'el_GR.ISO8859-7', - 'gu_in': 'gu_IN.UTF-8', - 'gv': 'gv_GB.ISO8859-1', - 'gv_gb': 'gv_GB.ISO8859-1', - 'gv_gb.iso88591': 'gv_GB.ISO8859-1', - 'gv_gb.iso885914': 'gv_GB.ISO8859-14', - 'gv_gb.iso885915': 'gv_GB.ISO8859-15', - 'gv_gb@euro': 'gv_GB.ISO8859-15', - 'ha_ng': 'ha_NG.UTF-8', - 'he': 'he_IL.ISO8859-8', - 'he_il': 'he_IL.ISO8859-8', - 'he_il.cp1255': 'he_IL.CP1255', - 'he_il.iso88598': 'he_IL.ISO8859-8', - 'he_il.microsoftcp1255': 'he_IL.CP1255', - 'hebrew': 'he_IL.ISO8859-8', - 'hebrew.iso88598': 'he_IL.ISO8859-8', - 'hi': 'hi_IN.ISCII-DEV', - 'hi_in': 'hi_IN.ISCII-DEV', - 'hi_in.isciidev': 'hi_IN.ISCII-DEV', - 'hne': 'hne_IN.UTF-8', - 'hne_in': 'hne_IN.UTF-8', - 'hr': 'hr_HR.ISO8859-2', - 'hr_hr': 'hr_HR.ISO8859-2', - 'hr_hr.iso88592': 'hr_HR.ISO8859-2', - 'hrvatski': 'hr_HR.ISO8859-2', - 'hsb_de': 'hsb_DE.ISO8859-2', - 'ht_ht': 'ht_HT.UTF-8', - 'hu': 'hu_HU.ISO8859-2', - 'hu_hu': 'hu_HU.ISO8859-2', - 'hu_hu.iso88592': 'hu_HU.ISO8859-2', - 'hungarian': 'hu_HU.ISO8859-2', - 'hy_am': 'hy_AM.UTF-8', - 'hy_am.armscii8': 'hy_AM.ARMSCII_8', - 'ia': 'ia.UTF-8', - 'ia_fr': 'ia_FR.UTF-8', - 'icelandic': 'is_IS.ISO8859-1', - 'icelandic.iso88591': 'is_IS.ISO8859-1', - 'id': 'id_ID.ISO8859-1', - 'id_id': 'id_ID.ISO8859-1', - 'ig_ng': 'ig_NG.UTF-8', - 'ik_ca': 'ik_CA.UTF-8', - 'in': 'id_ID.ISO8859-1', - 'in_id': 'id_ID.ISO8859-1', - 'is': 'is_IS.ISO8859-1', - 'is_is': 'is_IS.ISO8859-1', - 'is_is.iso88591': 'is_IS.ISO8859-1', - 'is_is.iso885915': 'is_IS.ISO8859-15', - 'is_is@euro': 'is_IS.ISO8859-15', - 'iso-8859-1': 'en_US.ISO8859-1', - 'iso-8859-15': 'en_US.ISO8859-15', - 'iso8859-1': 'en_US.ISO8859-1', - 'iso8859-15': 'en_US.ISO8859-15', - 'iso_8859_1': 'en_US.ISO8859-1', - 'iso_8859_15': 'en_US.ISO8859-15', - 'it': 'it_IT.ISO8859-1', - 'it.iso885915': 'it_IT.ISO8859-15', - 'it_ch': 'it_CH.ISO8859-1', - 'it_ch.iso88591': 'it_CH.ISO8859-1', - 'it_ch.iso885915': 'it_CH.ISO8859-15', - 'it_ch@euro': 'it_CH.ISO8859-15', - 'it_it': 'it_IT.ISO8859-1', - 'it_it.88591': 'it_IT.ISO8859-1', - 'it_it.iso88591': 'it_IT.ISO8859-1', - 'it_it.iso885915': 'it_IT.ISO8859-15', - 'it_it.iso885915@euro': 'it_IT.ISO8859-15', - 'it_it.utf8@euro': 'it_IT.UTF-8', - 'it_it@euro': 'it_IT.ISO8859-15', - 'italian': 'it_IT.ISO8859-1', - 'italian.iso88591': 'it_IT.ISO8859-1', - 'iu': 'iu_CA.NUNACOM-8', - 'iu_ca': 'iu_CA.NUNACOM-8', - 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8', - 'iw': 'he_IL.ISO8859-8', - 'iw_il': 'he_IL.ISO8859-8', - 'iw_il.iso88598': 'he_IL.ISO8859-8', - 'iw_il.utf8': 'iw_IL.UTF-8', - 'ja': 'ja_JP.eucJP', - 'ja.jis': 'ja_JP.JIS7', - 'ja.sjis': 'ja_JP.SJIS', - 'ja_jp': 'ja_JP.eucJP', - 'ja_jp.ajec': 'ja_JP.eucJP', - 'ja_jp.euc': 'ja_JP.eucJP', - 'ja_jp.eucjp': 'ja_JP.eucJP', - 'ja_jp.iso-2022-jp': 'ja_JP.JIS7', - 'ja_jp.iso2022jp': 'ja_JP.JIS7', - 'ja_jp.jis': 'ja_JP.JIS7', - 'ja_jp.jis7': 'ja_JP.JIS7', - 'ja_jp.mscode': 'ja_JP.SJIS', - 'ja_jp.pck': 'ja_JP.SJIS', - 'ja_jp.sjis': 'ja_JP.SJIS', - 'ja_jp.ujis': 'ja_JP.eucJP', - 'japan': 'ja_JP.eucJP', - 'japanese': 'ja_JP.eucJP', - 'japanese-euc': 'ja_JP.eucJP', - 'japanese.euc': 'ja_JP.eucJP', - 'japanese.sjis': 'ja_JP.SJIS', - 'jp_jp': 'ja_JP.eucJP', - 'ka': 'ka_GE.GEORGIAN-ACADEMY', - 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY', - 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY', - 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS', - 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY', - 'kk_kz': 'kk_KZ.RK1048', - 'kl': 'kl_GL.ISO8859-1', - 'kl_gl': 'kl_GL.ISO8859-1', - 'kl_gl.iso88591': 'kl_GL.ISO8859-1', - 'kl_gl.iso885915': 'kl_GL.ISO8859-15', - 'kl_gl@euro': 'kl_GL.ISO8859-15', - 'km_kh': 'km_KH.UTF-8', - 'kn': 'kn_IN.UTF-8', - 'kn_in': 'kn_IN.UTF-8', - 'ko': 'ko_KR.eucKR', - 'ko_kr': 'ko_KR.eucKR', - 'ko_kr.euc': 'ko_KR.eucKR', - 'ko_kr.euckr': 'ko_KR.eucKR', - 'kok_in': 'kok_IN.UTF-8', - 'korean': 'ko_KR.eucKR', - 'korean.euc': 'ko_KR.eucKR', - 'ks': 'ks_IN.UTF-8', - 'ks_in': 'ks_IN.UTF-8', - 'ks_in@devanagari': 'ks_IN.UTF-8@devanagari', - 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari', - 'ku_tr': 'ku_TR.ISO8859-9', - 'kw': 'kw_GB.ISO8859-1', - 'kw_gb': 'kw_GB.ISO8859-1', - 'kw_gb.iso88591': 'kw_GB.ISO8859-1', - 'kw_gb.iso885914': 'kw_GB.ISO8859-14', - 'kw_gb.iso885915': 'kw_GB.ISO8859-15', - 'kw_gb@euro': 'kw_GB.ISO8859-15', - 'ky': 'ky_KG.UTF-8', - 'ky_kg': 'ky_KG.UTF-8', - 'lb_lu': 'lb_LU.UTF-8', - 'lg_ug': 'lg_UG.ISO8859-10', - 'li_be': 'li_BE.UTF-8', - 'li_nl': 'li_NL.UTF-8', - 'lij_it': 'lij_IT.UTF-8', - 'lithuanian': 'lt_LT.ISO8859-13', - 'lo': 'lo_LA.MULELAO-1', - 'lo_la': 'lo_LA.MULELAO-1', - 'lo_la.cp1133': 'lo_LA.IBM-CP1133', - 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133', - 'lo_la.mulelao1': 'lo_LA.MULELAO-1', - 'lt': 'lt_LT.ISO8859-13', - 'lt_lt': 'lt_LT.ISO8859-13', - 'lt_lt.iso885913': 'lt_LT.ISO8859-13', - 'lt_lt.iso88594': 'lt_LT.ISO8859-4', - 'lv': 'lv_LV.ISO8859-13', - 'lv_lv': 'lv_LV.ISO8859-13', - 'lv_lv.iso885913': 'lv_LV.ISO8859-13', - 'lv_lv.iso88594': 'lv_LV.ISO8859-4', - 'mag_in': 'mag_IN.UTF-8', - 'mai': 'mai_IN.UTF-8', - 'mai_in': 'mai_IN.UTF-8', - 'mg_mg': 'mg_MG.ISO8859-15', - 'mhr_ru': 'mhr_RU.UTF-8', - 'mi': 'mi_NZ.ISO8859-1', - 'mi_nz': 'mi_NZ.ISO8859-1', - 'mi_nz.iso88591': 'mi_NZ.ISO8859-1', - 'mk': 'mk_MK.ISO8859-5', - 'mk_mk': 'mk_MK.ISO8859-5', - 'mk_mk.cp1251': 'mk_MK.CP1251', - 'mk_mk.iso88595': 'mk_MK.ISO8859-5', - 'mk_mk.microsoftcp1251': 'mk_MK.CP1251', - 'ml': 'ml_IN.UTF-8', - 'ml_in': 'ml_IN.UTF-8', - 'mn_mn': 'mn_MN.UTF-8', - 'mni_in': 'mni_IN.UTF-8', - 'mr': 'mr_IN.UTF-8', - 'mr_in': 'mr_IN.UTF-8', - 'ms': 'ms_MY.ISO8859-1', - 'ms_my': 'ms_MY.ISO8859-1', - 'ms_my.iso88591': 'ms_MY.ISO8859-1', - 'mt': 'mt_MT.ISO8859-3', - 'mt_mt': 'mt_MT.ISO8859-3', - 'mt_mt.iso88593': 'mt_MT.ISO8859-3', - 'my_mm': 'my_MM.UTF-8', - 'nan_tw@latin': 'nan_TW.UTF-8@latin', - 'nb': 'nb_NO.ISO8859-1', - 'nb_no': 'nb_NO.ISO8859-1', - 'nb_no.88591': 'nb_NO.ISO8859-1', - 'nb_no.iso88591': 'nb_NO.ISO8859-1', - 'nb_no.iso885915': 'nb_NO.ISO8859-15', - 'nb_no@euro': 'nb_NO.ISO8859-15', - 'nds_de': 'nds_DE.UTF-8', - 'nds_nl': 'nds_NL.UTF-8', - 'ne_np': 'ne_NP.UTF-8', - 'nhn_mx': 'nhn_MX.UTF-8', - 'niu_nu': 'niu_NU.UTF-8', - 'niu_nz': 'niu_NZ.UTF-8', - 'nl': 'nl_NL.ISO8859-1', - 'nl.iso885915': 'nl_NL.ISO8859-15', - 'nl_aw': 'nl_AW.UTF-8', - 'nl_be': 'nl_BE.ISO8859-1', - 'nl_be.88591': 'nl_BE.ISO8859-1', - 'nl_be.iso88591': 'nl_BE.ISO8859-1', - 'nl_be.iso885915': 'nl_BE.ISO8859-15', - 'nl_be.iso885915@euro': 'nl_BE.ISO8859-15', - 'nl_be.utf8@euro': 'nl_BE.UTF-8', - 'nl_be@euro': 'nl_BE.ISO8859-15', - 'nl_nl': 'nl_NL.ISO8859-1', - 'nl_nl.88591': 'nl_NL.ISO8859-1', - 'nl_nl.iso88591': 'nl_NL.ISO8859-1', - 'nl_nl.iso885915': 'nl_NL.ISO8859-15', - 'nl_nl.iso885915@euro': 'nl_NL.ISO8859-15', - 'nl_nl.utf8@euro': 'nl_NL.UTF-8', - 'nl_nl@euro': 'nl_NL.ISO8859-15', - 'nn': 'nn_NO.ISO8859-1', - 'nn_no': 'nn_NO.ISO8859-1', - 'nn_no.88591': 'nn_NO.ISO8859-1', - 'nn_no.iso88591': 'nn_NO.ISO8859-1', - 'nn_no.iso885915': 'nn_NO.ISO8859-15', - 'nn_no@euro': 'nn_NO.ISO8859-15', - 'no': 'no_NO.ISO8859-1', - 'no@nynorsk': 'ny_NO.ISO8859-1', - 'no_no': 'no_NO.ISO8859-1', - 'no_no.88591': 'no_NO.ISO8859-1', - 'no_no.iso88591': 'no_NO.ISO8859-1', - 'no_no.iso885915': 'no_NO.ISO8859-15', - 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1', - 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1', - 'no_no@euro': 'no_NO.ISO8859-15', - 'norwegian': 'no_NO.ISO8859-1', - 'norwegian.iso88591': 'no_NO.ISO8859-1', - 'nr': 'nr_ZA.ISO8859-1', - 'nr_za': 'nr_ZA.ISO8859-1', - 'nr_za.iso88591': 'nr_ZA.ISO8859-1', - 'nso': 'nso_ZA.ISO8859-15', - 'nso_za': 'nso_ZA.ISO8859-15', - 'nso_za.iso885915': 'nso_ZA.ISO8859-15', - 'ny': 'ny_NO.ISO8859-1', - 'ny_no': 'ny_NO.ISO8859-1', - 'ny_no.88591': 'ny_NO.ISO8859-1', - 'ny_no.iso88591': 'ny_NO.ISO8859-1', - 'ny_no.iso885915': 'ny_NO.ISO8859-15', - 'ny_no@euro': 'ny_NO.ISO8859-15', - 'nynorsk': 'nn_NO.ISO8859-1', - 'oc': 'oc_FR.ISO8859-1', - 'oc_fr': 'oc_FR.ISO8859-1', - 'oc_fr.iso88591': 'oc_FR.ISO8859-1', - 'oc_fr.iso885915': 'oc_FR.ISO8859-15', - 'oc_fr@euro': 'oc_FR.ISO8859-15', - 'om_et': 'om_ET.UTF-8', - 'om_ke': 'om_KE.ISO8859-1', - 'or': 'or_IN.UTF-8', - 'or_in': 'or_IN.UTF-8', - 'os_ru': 'os_RU.UTF-8', - 'pa': 'pa_IN.UTF-8', - 'pa_in': 'pa_IN.UTF-8', - 'pa_pk': 'pa_PK.UTF-8', - 'pap_an': 'pap_AN.UTF-8', - 'pd': 'pd_US.ISO8859-1', - 'pd_de': 'pd_DE.ISO8859-1', - 'pd_de.iso88591': 'pd_DE.ISO8859-1', - 'pd_de.iso885915': 'pd_DE.ISO8859-15', - 'pd_de@euro': 'pd_DE.ISO8859-15', - 'pd_us': 'pd_US.ISO8859-1', - 'pd_us.iso88591': 'pd_US.ISO8859-1', - 'pd_us.iso885915': 'pd_US.ISO8859-15', - 'pd_us@euro': 'pd_US.ISO8859-15', - 'ph': 'ph_PH.ISO8859-1', - 'ph_ph': 'ph_PH.ISO8859-1', - 'ph_ph.iso88591': 'ph_PH.ISO8859-1', - 'pl': 'pl_PL.ISO8859-2', - 'pl_pl': 'pl_PL.ISO8859-2', - 'pl_pl.iso88592': 'pl_PL.ISO8859-2', - 'polish': 'pl_PL.ISO8859-2', - 'portuguese': 'pt_PT.ISO8859-1', - 'portuguese.iso88591': 'pt_PT.ISO8859-1', - 'portuguese_brazil': 'pt_BR.ISO8859-1', - 'portuguese_brazil.8859': 'pt_BR.ISO8859-1', - 'posix': 'C', - 'posix-utf2': 'C', - 'pp': 'pp_AN.ISO8859-1', - 'pp_an': 'pp_AN.ISO8859-1', - 'pp_an.iso88591': 'pp_AN.ISO8859-1', - 'ps_af': 'ps_AF.UTF-8', - 'pt': 'pt_PT.ISO8859-1', - 'pt.iso885915': 'pt_PT.ISO8859-15', - 'pt_br': 'pt_BR.ISO8859-1', - 'pt_br.88591': 'pt_BR.ISO8859-1', - 'pt_br.iso88591': 'pt_BR.ISO8859-1', - 'pt_br.iso885915': 'pt_BR.ISO8859-15', - 'pt_br@euro': 'pt_BR.ISO8859-15', - 'pt_pt': 'pt_PT.ISO8859-1', - 'pt_pt.88591': 'pt_PT.ISO8859-1', - 'pt_pt.iso88591': 'pt_PT.ISO8859-1', - 'pt_pt.iso885915': 'pt_PT.ISO8859-15', - 'pt_pt.iso885915@euro': 'pt_PT.ISO8859-15', - 'pt_pt.utf8@euro': 'pt_PT.UTF-8', - 'pt_pt@euro': 'pt_PT.ISO8859-15', - 'ro': 'ro_RO.ISO8859-2', - 'ro_ro': 'ro_RO.ISO8859-2', - 'ro_ro.iso88592': 'ro_RO.ISO8859-2', - 'romanian': 'ro_RO.ISO8859-2', - 'ru': 'ru_RU.UTF-8', - 'ru.koi8r': 'ru_RU.KOI8-R', - 'ru_ru': 'ru_RU.UTF-8', - 'ru_ru.cp1251': 'ru_RU.CP1251', - 'ru_ru.iso88595': 'ru_RU.ISO8859-5', - 'ru_ru.koi8r': 'ru_RU.KOI8-R', - 'ru_ru.microsoftcp1251': 'ru_RU.CP1251', - 'ru_ua': 'ru_UA.KOI8-U', - 'ru_ua.cp1251': 'ru_UA.CP1251', - 'ru_ua.koi8u': 'ru_UA.KOI8-U', - 'ru_ua.microsoftcp1251': 'ru_UA.CP1251', - 'rumanian': 'ro_RO.ISO8859-2', - 'russian': 'ru_RU.ISO8859-5', - 'rw': 'rw_RW.ISO8859-1', - 'rw_rw': 'rw_RW.ISO8859-1', - 'rw_rw.iso88591': 'rw_RW.ISO8859-1', - 'sa_in': 'sa_IN.UTF-8', - 'sat_in': 'sat_IN.UTF-8', - 'sc_it': 'sc_IT.UTF-8', - 'sd': 'sd_IN.UTF-8', - 'sd@devanagari': 'sd_IN.UTF-8@devanagari', - 'sd_in': 'sd_IN.UTF-8', - 'sd_in@devanagari': 'sd_IN.UTF-8@devanagari', - 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari', - 'sd_pk': 'sd_PK.UTF-8', - 'se_no': 'se_NO.UTF-8', - 'serbocroatian': 'sr_RS.UTF-8@latin', - 'sh': 'sr_RS.UTF-8@latin', - 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2', - 'sh_hr': 'sh_HR.ISO8859-2', - 'sh_hr.iso88592': 'hr_HR.ISO8859-2', - 'sh_sp': 'sr_CS.ISO8859-2', - 'sh_yu': 'sr_RS.UTF-8@latin', - 'shs_ca': 'shs_CA.UTF-8', - 'si': 'si_LK.UTF-8', - 'si_lk': 'si_LK.UTF-8', - 'sid_et': 'sid_ET.UTF-8', - 'sinhala': 'si_LK.UTF-8', - 'sk': 'sk_SK.ISO8859-2', - 'sk_sk': 'sk_SK.ISO8859-2', - 'sk_sk.iso88592': 'sk_SK.ISO8859-2', - 'sl': 'sl_SI.ISO8859-2', - 'sl_cs': 'sl_CS.ISO8859-2', - 'sl_si': 'sl_SI.ISO8859-2', - 'sl_si.iso88592': 'sl_SI.ISO8859-2', - 'slovak': 'sk_SK.ISO8859-2', - 'slovene': 'sl_SI.ISO8859-2', - 'slovenian': 'sl_SI.ISO8859-2', - 'so_dj': 'so_DJ.ISO8859-1', - 'so_et': 'so_ET.UTF-8', - 'so_ke': 'so_KE.ISO8859-1', - 'so_so': 'so_SO.ISO8859-1', - 'sp': 'sr_CS.ISO8859-5', - 'sp_yu': 'sr_CS.ISO8859-5', - 'spanish': 'es_ES.ISO8859-1', - 'spanish.iso88591': 'es_ES.ISO8859-1', - 'spanish_spain': 'es_ES.ISO8859-1', - 'spanish_spain.8859': 'es_ES.ISO8859-1', - 'sq': 'sq_AL.ISO8859-2', - 'sq_al': 'sq_AL.ISO8859-2', - 'sq_al.iso88592': 'sq_AL.ISO8859-2', - 'sq_mk': 'sq_MK.UTF-8', - 'sr': 'sr_RS.UTF-8', - 'sr@cyrillic': 'sr_RS.UTF-8', - 'sr@latin': 'sr_RS.UTF-8@latin', - 'sr@latn': 'sr_CS.UTF-8@latin', - 'sr_cs': 'sr_CS.UTF-8', - 'sr_cs.iso88592': 'sr_CS.ISO8859-2', - 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2', - 'sr_cs.iso88595': 'sr_CS.ISO8859-5', - 'sr_cs.utf8@latn': 'sr_CS.UTF-8@latin', - 'sr_cs@latn': 'sr_CS.UTF-8@latin', - 'sr_me': 'sr_ME.UTF-8', - 'sr_rs': 'sr_RS.UTF-8', - 'sr_rs@latin': 'sr_RS.UTF-8@latin', - 'sr_rs@latn': 'sr_RS.UTF-8@latin', - 'sr_sp': 'sr_CS.ISO8859-2', - 'sr_yu': 'sr_RS.UTF-8@latin', - 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251', - 'sr_yu.iso88592': 'sr_CS.ISO8859-2', - 'sr_yu.iso88595': 'sr_CS.ISO8859-5', - 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5', - 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251', - 'sr_yu.utf8': 'sr_RS.UTF-8', - 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8', - 'sr_yu@cyrillic': 'sr_RS.UTF-8', - 'ss': 'ss_ZA.ISO8859-1', - 'ss_za': 'ss_ZA.ISO8859-1', - 'ss_za.iso88591': 'ss_ZA.ISO8859-1', - 'st': 'st_ZA.ISO8859-1', - 'st_za': 'st_ZA.ISO8859-1', - 'st_za.iso88591': 'st_ZA.ISO8859-1', - 'sv': 'sv_SE.ISO8859-1', - 'sv.iso885915': 'sv_SE.ISO8859-15', - 'sv_fi': 'sv_FI.ISO8859-1', - 'sv_fi.iso88591': 'sv_FI.ISO8859-1', - 'sv_fi.iso885915': 'sv_FI.ISO8859-15', - 'sv_fi.iso885915@euro': 'sv_FI.ISO8859-15', - 'sv_fi.utf8@euro': 'sv_FI.UTF-8', - 'sv_fi@euro': 'sv_FI.ISO8859-15', - 'sv_se': 'sv_SE.ISO8859-1', - 'sv_se.88591': 'sv_SE.ISO8859-1', - 'sv_se.iso88591': 'sv_SE.ISO8859-1', - 'sv_se.iso885915': 'sv_SE.ISO8859-15', - 'sv_se@euro': 'sv_SE.ISO8859-15', - 'sw_ke': 'sw_KE.UTF-8', - 'sw_tz': 'sw_TZ.UTF-8', - 'swedish': 'sv_SE.ISO8859-1', - 'swedish.iso88591': 'sv_SE.ISO8859-1', - 'szl_pl': 'szl_PL.UTF-8', - 'ta': 'ta_IN.TSCII-0', - 'ta_in': 'ta_IN.TSCII-0', - 'ta_in.tscii': 'ta_IN.TSCII-0', - 'ta_in.tscii0': 'ta_IN.TSCII-0', - 'ta_lk': 'ta_LK.UTF-8', - 'te': 'te_IN.UTF-8', - 'te_in': 'te_IN.UTF-8', - 'tg': 'tg_TJ.KOI8-C', - 'tg_tj': 'tg_TJ.KOI8-C', - 'tg_tj.koi8c': 'tg_TJ.KOI8-C', - 'th': 'th_TH.ISO8859-11', - 'th_th': 'th_TH.ISO8859-11', - 'th_th.iso885911': 'th_TH.ISO8859-11', - 'th_th.tactis': 'th_TH.TIS620', - 'th_th.tis620': 'th_TH.TIS620', - 'thai': 'th_TH.ISO8859-11', - 'ti_er': 'ti_ER.UTF-8', - 'ti_et': 'ti_ET.UTF-8', - 'tig_er': 'tig_ER.UTF-8', - 'tk_tm': 'tk_TM.UTF-8', - 'tl': 'tl_PH.ISO8859-1', - 'tl_ph': 'tl_PH.ISO8859-1', - 'tl_ph.iso88591': 'tl_PH.ISO8859-1', - 'tn': 'tn_ZA.ISO8859-15', - 'tn_za': 'tn_ZA.ISO8859-15', - 'tn_za.iso885915': 'tn_ZA.ISO8859-15', - 'tr': 'tr_TR.ISO8859-9', - 'tr_cy': 'tr_CY.ISO8859-9', - 'tr_tr': 'tr_TR.ISO8859-9', - 'tr_tr.iso88599': 'tr_TR.ISO8859-9', - 'ts': 'ts_ZA.ISO8859-1', - 'ts_za': 'ts_ZA.ISO8859-1', - 'ts_za.iso88591': 'ts_ZA.ISO8859-1', - 'tt': 'tt_RU.TATAR-CYR', - 'tt_ru': 'tt_RU.TATAR-CYR', - 'tt_ru.koi8c': 'tt_RU.KOI8-C', - 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR', - 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif', - 'turkish': 'tr_TR.ISO8859-9', - 'turkish.iso88599': 'tr_TR.ISO8859-9', - 'ug_cn': 'ug_CN.UTF-8', - 'uk': 'uk_UA.KOI8-U', - 'uk_ua': 'uk_UA.KOI8-U', - 'uk_ua.cp1251': 'uk_UA.CP1251', - 'uk_ua.iso88595': 'uk_UA.ISO8859-5', - 'uk_ua.koi8u': 'uk_UA.KOI8-U', - 'uk_ua.microsoftcp1251': 'uk_UA.CP1251', - 'univ': 'en_US.UTF-8', - 'universal': 'en_US.UTF-8', - 'universal.utf8@ucs4': 'en_US.UTF-8', - 'unm_us': 'unm_US.UTF-8', - 'ur': 'ur_PK.CP1256', - 'ur_in': 'ur_IN.UTF-8', - 'ur_pk': 'ur_PK.CP1256', - 'ur_pk.cp1256': 'ur_PK.CP1256', - 'ur_pk.microsoftcp1256': 'ur_PK.CP1256', - 'uz': 'uz_UZ.UTF-8', - 'uz_uz': 'uz_UZ.UTF-8', - 'uz_uz.iso88591': 'uz_UZ.ISO8859-1', - 'uz_uz.utf8@cyrillic': 'uz_UZ.UTF-8', - 'uz_uz@cyrillic': 'uz_UZ.UTF-8', - 've': 've_ZA.UTF-8', - 've_za': 've_ZA.UTF-8', - 'vi': 'vi_VN.TCVN', - 'vi_vn': 'vi_VN.TCVN', - 'vi_vn.tcvn': 'vi_VN.TCVN', - 'vi_vn.tcvn5712': 'vi_VN.TCVN', - 'vi_vn.viscii': 'vi_VN.VISCII', - 'vi_vn.viscii111': 'vi_VN.VISCII', - 'wa': 'wa_BE.ISO8859-1', - 'wa_be': 'wa_BE.ISO8859-1', - 'wa_be.iso88591': 'wa_BE.ISO8859-1', - 'wa_be.iso885915': 'wa_BE.ISO8859-15', - 'wa_be.iso885915@euro': 'wa_BE.ISO8859-15', - 'wa_be@euro': 'wa_BE.ISO8859-15', - 'wae_ch': 'wae_CH.UTF-8', - 'wal_et': 'wal_ET.UTF-8', - 'wo_sn': 'wo_SN.UTF-8', - 'xh': 'xh_ZA.ISO8859-1', - 'xh_za': 'xh_ZA.ISO8859-1', - 'xh_za.iso88591': 'xh_ZA.ISO8859-1', - 'yi': 'yi_US.CP1255', - 'yi_us': 'yi_US.CP1255', - 'yi_us.cp1255': 'yi_US.CP1255', - 'yi_us.microsoftcp1255': 'yi_US.CP1255', - 'yo_ng': 'yo_NG.UTF-8', - 'yue_hk': 'yue_HK.UTF-8', - 'zh': 'zh_CN.eucCN', - 'zh_cn': 'zh_CN.gb2312', - 'zh_cn.big5': 'zh_TW.big5', - 'zh_cn.euc': 'zh_CN.eucCN', - 'zh_cn.gb18030': 'zh_CN.gb18030', - 'zh_cn.gb2312': 'zh_CN.gb2312', - 'zh_cn.gbk': 'zh_CN.gbk', - 'zh_hk': 'zh_HK.big5hkscs', - 'zh_hk.big5': 'zh_HK.big5', - 'zh_hk.big5hk': 'zh_HK.big5hkscs', - 'zh_hk.big5hkscs': 'zh_HK.big5hkscs', - 'zh_sg': 'zh_SG.GB2312', - 'zh_sg.gbk': 'zh_SG.GBK', - 'zh_tw': 'zh_TW.big5', - 'zh_tw.big5': 'zh_TW.big5', - 'zh_tw.euc': 'zh_TW.eucTW', - 'zh_tw.euctw': 'zh_TW.eucTW', - 'zu': 'zu_ZA.ISO8859-1', - 'zu_za': 'zu_ZA.ISO8859-1', - 'zu_za.iso88591': 'zu_ZA.ISO8859-1', -} - -# -# This maps Windows language identifiers to locale strings. -# -# This list has been updated from -# http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp -# to include every locale up to Windows Vista. -# -# NOTE: this mapping is incomplete. If your language is missing, please -# submit a bug report to the Python bug tracker at http://bugs.python.org/ -# Make sure you include the missing language identifier and the suggested -# locale code. -# - -windows_locale = { - 0x0436: "af_ZA", # Afrikaans - 0x041c: "sq_AL", # Albanian - 0x0484: "gsw_FR",# Alsatian - France - 0x045e: "am_ET", # Amharic - Ethiopia - 0x0401: "ar_SA", # Arabic - Saudi Arabia - 0x0801: "ar_IQ", # Arabic - Iraq - 0x0c01: "ar_EG", # Arabic - Egypt - 0x1001: "ar_LY", # Arabic - Libya - 0x1401: "ar_DZ", # Arabic - Algeria - 0x1801: "ar_MA", # Arabic - Morocco - 0x1c01: "ar_TN", # Arabic - Tunisia - 0x2001: "ar_OM", # Arabic - Oman - 0x2401: "ar_YE", # Arabic - Yemen - 0x2801: "ar_SY", # Arabic - Syria - 0x2c01: "ar_JO", # Arabic - Jordan - 0x3001: "ar_LB", # Arabic - Lebanon - 0x3401: "ar_KW", # Arabic - Kuwait - 0x3801: "ar_AE", # Arabic - United Arab Emirates - 0x3c01: "ar_BH", # Arabic - Bahrain - 0x4001: "ar_QA", # Arabic - Qatar - 0x042b: "hy_AM", # Armenian - 0x044d: "as_IN", # Assamese - India - 0x042c: "az_AZ", # Azeri - Latin - 0x082c: "az_AZ", # Azeri - Cyrillic - 0x046d: "ba_RU", # Bashkir - 0x042d: "eu_ES", # Basque - Russia - 0x0423: "be_BY", # Belarusian - 0x0445: "bn_IN", # Begali - 0x201a: "bs_BA", # Bosnian - Cyrillic - 0x141a: "bs_BA", # Bosnian - Latin - 0x047e: "br_FR", # Breton - France - 0x0402: "bg_BG", # Bulgarian -# 0x0455: "my_MM", # Burmese - Not supported - 0x0403: "ca_ES", # Catalan - 0x0004: "zh_CHS",# Chinese - Simplified - 0x0404: "zh_TW", # Chinese - Taiwan - 0x0804: "zh_CN", # Chinese - PRC - 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R. - 0x1004: "zh_SG", # Chinese - Singapore - 0x1404: "zh_MO", # Chinese - Macao S.A.R. - 0x7c04: "zh_CHT",# Chinese - Traditional - 0x0483: "co_FR", # Corsican - France - 0x041a: "hr_HR", # Croatian - 0x101a: "hr_BA", # Croatian - Bosnia - 0x0405: "cs_CZ", # Czech - 0x0406: "da_DK", # Danish - 0x048c: "gbz_AF",# Dari - Afghanistan - 0x0465: "div_MV",# Divehi - Maldives - 0x0413: "nl_NL", # Dutch - The Netherlands - 0x0813: "nl_BE", # Dutch - Belgium - 0x0409: "en_US", # English - United States - 0x0809: "en_GB", # English - United Kingdom - 0x0c09: "en_AU", # English - Australia - 0x1009: "en_CA", # English - Canada - 0x1409: "en_NZ", # English - New Zealand - 0x1809: "en_IE", # English - Ireland - 0x1c09: "en_ZA", # English - South Africa - 0x2009: "en_JA", # English - Jamaica - 0x2409: "en_CB", # English - Caribbean - 0x2809: "en_BZ", # English - Belize - 0x2c09: "en_TT", # English - Trinidad - 0x3009: "en_ZW", # English - Zimbabwe - 0x3409: "en_PH", # English - Philippines - 0x4009: "en_IN", # English - India - 0x4409: "en_MY", # English - Malaysia - 0x4809: "en_IN", # English - Singapore - 0x0425: "et_EE", # Estonian - 0x0438: "fo_FO", # Faroese - 0x0464: "fil_PH",# Filipino - 0x040b: "fi_FI", # Finnish - 0x040c: "fr_FR", # French - France - 0x080c: "fr_BE", # French - Belgium - 0x0c0c: "fr_CA", # French - Canada - 0x100c: "fr_CH", # French - Switzerland - 0x140c: "fr_LU", # French - Luxembourg - 0x180c: "fr_MC", # French - Monaco - 0x0462: "fy_NL", # Frisian - Netherlands - 0x0456: "gl_ES", # Galician - 0x0437: "ka_GE", # Georgian - 0x0407: "de_DE", # German - Germany - 0x0807: "de_CH", # German - Switzerland - 0x0c07: "de_AT", # German - Austria - 0x1007: "de_LU", # German - Luxembourg - 0x1407: "de_LI", # German - Liechtenstein - 0x0408: "el_GR", # Greek - 0x046f: "kl_GL", # Greenlandic - Greenland - 0x0447: "gu_IN", # Gujarati - 0x0468: "ha_NG", # Hausa - Latin - 0x040d: "he_IL", # Hebrew - 0x0439: "hi_IN", # Hindi - 0x040e: "hu_HU", # Hungarian - 0x040f: "is_IS", # Icelandic - 0x0421: "id_ID", # Indonesian - 0x045d: "iu_CA", # Inuktitut - Syllabics - 0x085d: "iu_CA", # Inuktitut - Latin - 0x083c: "ga_IE", # Irish - Ireland - 0x0410: "it_IT", # Italian - Italy - 0x0810: "it_CH", # Italian - Switzerland - 0x0411: "ja_JP", # Japanese - 0x044b: "kn_IN", # Kannada - India - 0x043f: "kk_KZ", # Kazakh - 0x0453: "kh_KH", # Khmer - Cambodia - 0x0486: "qut_GT",# K'iche - Guatemala - 0x0487: "rw_RW", # Kinyarwanda - Rwanda - 0x0457: "kok_IN",# Konkani - 0x0412: "ko_KR", # Korean - 0x0440: "ky_KG", # Kyrgyz - 0x0454: "lo_LA", # Lao - Lao PDR - 0x0426: "lv_LV", # Latvian - 0x0427: "lt_LT", # Lithuanian - 0x082e: "dsb_DE",# Lower Sorbian - Germany - 0x046e: "lb_LU", # Luxembourgish - 0x042f: "mk_MK", # FYROM Macedonian - 0x043e: "ms_MY", # Malay - Malaysia - 0x083e: "ms_BN", # Malay - Brunei Darussalam - 0x044c: "ml_IN", # Malayalam - India - 0x043a: "mt_MT", # Maltese - 0x0481: "mi_NZ", # Maori - 0x047a: "arn_CL",# Mapudungun - 0x044e: "mr_IN", # Marathi - 0x047c: "moh_CA",# Mohawk - Canada - 0x0450: "mn_MN", # Mongolian - Cyrillic - 0x0850: "mn_CN", # Mongolian - PRC - 0x0461: "ne_NP", # Nepali - 0x0414: "nb_NO", # Norwegian - Bokmal - 0x0814: "nn_NO", # Norwegian - Nynorsk - 0x0482: "oc_FR", # Occitan - France - 0x0448: "or_IN", # Oriya - India - 0x0463: "ps_AF", # Pashto - Afghanistan - 0x0429: "fa_IR", # Persian - 0x0415: "pl_PL", # Polish - 0x0416: "pt_BR", # Portuguese - Brazil - 0x0816: "pt_PT", # Portuguese - Portugal - 0x0446: "pa_IN", # Punjabi - 0x046b: "quz_BO",# Quechua (Bolivia) - 0x086b: "quz_EC",# Quechua (Ecuador) - 0x0c6b: "quz_PE",# Quechua (Peru) - 0x0418: "ro_RO", # Romanian - Romania - 0x0417: "rm_CH", # Romansh - 0x0419: "ru_RU", # Russian - 0x243b: "smn_FI",# Sami Finland - 0x103b: "smj_NO",# Sami Norway - 0x143b: "smj_SE",# Sami Sweden - 0x043b: "se_NO", # Sami Northern Norway - 0x083b: "se_SE", # Sami Northern Sweden - 0x0c3b: "se_FI", # Sami Northern Finland - 0x203b: "sms_FI",# Sami Skolt - 0x183b: "sma_NO",# Sami Southern Norway - 0x1c3b: "sma_SE",# Sami Southern Sweden - 0x044f: "sa_IN", # Sanskrit - 0x0c1a: "sr_SP", # Serbian - Cyrillic - 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic - 0x081a: "sr_SP", # Serbian - Latin - 0x181a: "sr_BA", # Serbian - Bosnia Latin - 0x045b: "si_LK", # Sinhala - Sri Lanka - 0x046c: "ns_ZA", # Northern Sotho - 0x0432: "tn_ZA", # Setswana - Southern Africa - 0x041b: "sk_SK", # Slovak - 0x0424: "sl_SI", # Slovenian - 0x040a: "es_ES", # Spanish - Spain - 0x080a: "es_MX", # Spanish - Mexico - 0x0c0a: "es_ES", # Spanish - Spain (Modern) - 0x100a: "es_GT", # Spanish - Guatemala - 0x140a: "es_CR", # Spanish - Costa Rica - 0x180a: "es_PA", # Spanish - Panama - 0x1c0a: "es_DO", # Spanish - Dominican Republic - 0x200a: "es_VE", # Spanish - Venezuela - 0x240a: "es_CO", # Spanish - Colombia - 0x280a: "es_PE", # Spanish - Peru - 0x2c0a: "es_AR", # Spanish - Argentina - 0x300a: "es_EC", # Spanish - Ecuador - 0x340a: "es_CL", # Spanish - Chile - 0x380a: "es_UR", # Spanish - Uruguay - 0x3c0a: "es_PY", # Spanish - Paraguay - 0x400a: "es_BO", # Spanish - Bolivia - 0x440a: "es_SV", # Spanish - El Salvador - 0x480a: "es_HN", # Spanish - Honduras - 0x4c0a: "es_NI", # Spanish - Nicaragua - 0x500a: "es_PR", # Spanish - Puerto Rico - 0x540a: "es_US", # Spanish - United States -# 0x0430: "", # Sutu - Not supported - 0x0441: "sw_KE", # Swahili - 0x041d: "sv_SE", # Swedish - Sweden - 0x081d: "sv_FI", # Swedish - Finland - 0x045a: "syr_SY",# Syriac - 0x0428: "tg_TJ", # Tajik - Cyrillic - 0x085f: "tmz_DZ",# Tamazight - Latin - 0x0449: "ta_IN", # Tamil - 0x0444: "tt_RU", # Tatar - 0x044a: "te_IN", # Telugu - 0x041e: "th_TH", # Thai - 0x0851: "bo_BT", # Tibetan - Bhutan - 0x0451: "bo_CN", # Tibetan - PRC - 0x041f: "tr_TR", # Turkish - 0x0442: "tk_TM", # Turkmen - Cyrillic - 0x0480: "ug_CN", # Uighur - Arabic - 0x0422: "uk_UA", # Ukrainian - 0x042e: "wen_DE",# Upper Sorbian - Germany - 0x0420: "ur_PK", # Urdu - 0x0820: "ur_IN", # Urdu - India - 0x0443: "uz_UZ", # Uzbek - Latin - 0x0843: "uz_UZ", # Uzbek - Cyrillic - 0x042a: "vi_VN", # Vietnamese - 0x0452: "cy_GB", # Welsh - 0x0488: "wo_SN", # Wolof - Senegal - 0x0434: "xh_ZA", # Xhosa - South Africa - 0x0485: "sah_RU",# Yakut - Cyrillic - 0x0478: "ii_CN", # Yi - PRC - 0x046a: "yo_NG", # Yoruba - Nigeria - 0x0435: "zu_ZA", # Zulu -} - -def _print_locale(): - - """ Test function. - """ - categories = {} - def _init_categories(categories=categories): - for k,v in globals().items(): - if k[:3] == 'LC_': - categories[k] = v - _init_categories() - del categories['LC_ALL'] - - print 'Locale defaults as determined by getdefaultlocale():' - print '-'*72 - lang, enc = getdefaultlocale() - print 'Language: ', lang or '(undefined)' - print 'Encoding: ', enc or '(undefined)' - print - - print 'Locale settings on startup:' - print '-'*72 - for name,category in categories.items(): - print name, '...' - lang, enc = getlocale(category) - print ' Language: ', lang or '(undefined)' - print ' Encoding: ', enc or '(undefined)' - print - - print - print 'Locale settings after calling resetlocale():' - print '-'*72 - resetlocale() - for name,category in categories.items(): - print name, '...' - lang, enc = getlocale(category) - print ' Language: ', lang or '(undefined)' - print ' Encoding: ', enc or '(undefined)' - print - - try: - setlocale(LC_ALL, "") - except: - print 'NOTE:' - print 'setlocale(LC_ALL, "") does not support the default locale' - print 'given in the OS environment variables.' - else: - print - print 'Locale settings after calling setlocale(LC_ALL, ""):' - print '-'*72 - for name,category in categories.items(): - print name, '...' - lang, enc = getlocale(category) - print ' Language: ', lang or '(undefined)' - print ' Encoding: ', enc or '(undefined)' - print - -### - -try: - LC_MESSAGES -except NameError: - pass -else: - __all__.append("LC_MESSAGES") - -if __name__=='__main__': - print 'Locale aliasing:' - print - _print_locale() - print - print 'Number formatting:' - print - _test()
--- a/test/lib/python2.7/ntpath.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,550 +0,0 @@ -# Module 'ntpath' -- common operations on WinNT/Win95 pathnames -"""Common pathname manipulations, WindowsNT/95 version. - -Instead of importing this module directly, import os and refer to this -module as os.path. -""" - -import os -import sys -import stat -import genericpath -import warnings - -from genericpath import * -from genericpath import _unicode - -__all__ = ["normcase","isabs","join","splitdrive","split","splitext", - "basename","dirname","commonprefix","getsize","getmtime", - "getatime","getctime", "islink","exists","lexists","isdir","isfile", - "ismount","walk","expanduser","expandvars","normpath","abspath", - "splitunc","curdir","pardir","sep","pathsep","defpath","altsep", - "extsep","devnull","realpath","supports_unicode_filenames","relpath"] - -# strings representing various path-related bits and pieces -curdir = '.' -pardir = '..' -extsep = '.' -sep = '\\' -pathsep = ';' -altsep = '/' -defpath = '.;C:\\bin' -if 'ce' in sys.builtin_module_names: - defpath = '\\Windows' -elif 'os2' in sys.builtin_module_names: - # OS/2 w/ VACPP - altsep = '/' -devnull = 'nul' - -# Normalize the case of a pathname and map slashes to backslashes. -# Other normalizations (such as optimizing '../' away) are not done -# (this is done by normpath). - -def normcase(s): - """Normalize case of pathname. - - Makes all characters lowercase and all slashes into backslashes.""" - return s.replace("/", "\\").lower() - - -# Return whether a path is absolute. -# Trivial in Posix, harder on the Mac or MS-DOS. -# For DOS it is absolute if it starts with a slash or backslash (current -# volume), or if a pathname after the volume letter and colon / UNC resource -# starts with a slash or backslash. - -def isabs(s): - """Test whether a path is absolute""" - s = splitdrive(s)[1] - return s != '' and s[:1] in '/\\' - - -# Join two (or more) paths. -def join(path, *paths): - """Join two or more pathname components, inserting "\\" as needed.""" - result_drive, result_path = splitdrive(path) - for p in paths: - p_drive, p_path = splitdrive(p) - if p_path and p_path[0] in '\\/': - # Second path is absolute - if p_drive or not result_drive: - result_drive = p_drive - result_path = p_path - continue - elif p_drive and p_drive != result_drive: - if p_drive.lower() != result_drive.lower(): - # Different drives => ignore the first path entirely - result_drive = p_drive - result_path = p_path - continue - # Same drive in different case - result_drive = p_drive - # Second path is relative to the first - if result_path and result_path[-1] not in '\\/': - result_path = result_path + '\\' - result_path = result_path + p_path - ## add separator between UNC and non-absolute path - if (result_path and result_path[0] not in '\\/' and - result_drive and result_drive[-1:] != ':'): - return result_drive + sep + result_path - return result_drive + result_path - - -# Split a path in a drive specification (a drive letter followed by a -# colon) and the path specification. -# It is always true that drivespec + pathspec == p -def splitdrive(p): - """Split a pathname into drive/UNC sharepoint and relative path specifiers. - Returns a 2-tuple (drive_or_unc, path); either part may be empty. - - If you assign - result = splitdrive(p) - It is always true that: - result[0] + result[1] == p - - If the path contained a drive letter, drive_or_unc will contain everything - up to and including the colon. e.g. splitdrive("c:/dir") returns ("c:", "/dir") - - If the path contained a UNC path, the drive_or_unc will contain the host name - and share up to but not including the fourth directory separator character. - e.g. splitdrive("//host/computer/dir") returns ("//host/computer", "/dir") - - Paths cannot contain both a drive letter and a UNC path. - - """ - if len(p) > 1: - normp = p.replace(altsep, sep) - if (normp[0:2] == sep*2) and (normp[2:3] != sep): - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path - # \\machine\mountpoint\directory\etc\... - # directory ^^^^^^^^^^^^^^^ - index = normp.find(sep, 2) - if index == -1: - return '', p - index2 = normp.find(sep, index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 == index + 1: - return '', p - if index2 == -1: - index2 = len(p) - return p[:index2], p[index2:] - if normp[1] == ':': - return p[:2], p[2:] - return '', p - -# Parse UNC paths -def splitunc(p): - """Split a pathname into UNC mount point and relative path specifiers. - - Return a 2-tuple (unc, rest); either part may be empty. - If unc is not empty, it has the form '//host/mount' (or similar - using backslashes). unc+rest is always the input path. - Paths containing drive letters never have a UNC part. - """ - if p[1:2] == ':': - return '', p # Drive letter present - firstTwo = p[0:2] - if firstTwo == '//' or firstTwo == '\\\\': - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvv equivalent to drive letter - # \\machine\mountpoint\directories... - # directory ^^^^^^^^^^^^^^^ - normp = p.replace('\\', '/') - index = normp.find('/', 2) - if index <= 2: - return '', p - index2 = normp.find('/', index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 == index + 1: - return '', p - if index2 == -1: - index2 = len(p) - return p[:index2], p[index2:] - return '', p - - -# Split a path in head (everything up to the last '/') and tail (the -# rest). After the trailing '/' is stripped, the invariant -# join(head, tail) == p holds. -# The resulting head won't end in '/' unless it is the root. - -def split(p): - """Split a pathname. - - Return tuple (head, tail) where tail is everything after the final slash. - Either part may be empty.""" - - d, p = splitdrive(p) - # set i to index beyond p's last slash - i = len(p) - while i and p[i-1] not in '/\\': - i = i - 1 - head, tail = p[:i], p[i:] # now tail has no slashes - # remove trailing slashes from head, unless it's all slashes - head2 = head - while head2 and head2[-1] in '/\\': - head2 = head2[:-1] - head = head2 or head - return d + head, tail - - -# Split a path in root and extension. -# The extension is everything starting at the last dot in the last -# pathname component; the root is everything before that. -# It is always true that root + ext == p. - -def splitext(p): - return genericpath._splitext(p, sep, altsep, extsep) -splitext.__doc__ = genericpath._splitext.__doc__ - - -# Return the tail (basename) part of a path. - -def basename(p): - """Returns the final component of a pathname""" - return split(p)[1] - - -# Return the head (dirname) part of a path. - -def dirname(p): - """Returns the directory component of a pathname""" - return split(p)[0] - -# Is a path a symbolic link? -# This will always return false on systems where posix.lstat doesn't exist. - -def islink(path): - """Test for symbolic link. - On WindowsNT/95 and OS/2 always returns false - """ - return False - -# alias exists to lexists -lexists = exists - -# Is a path a mount point? Either a root (with or without drive letter) -# or a UNC path with at most a / or \ after the mount point. - -def ismount(path): - """Test whether a path is a mount point (defined as root of drive)""" - unc, rest = splitunc(path) - if unc: - return rest in ("", "/", "\\") - p = splitdrive(path)[1] - return len(p) == 1 and p[0] in '/\\' - - -# Directory tree walk. -# For each directory under top (including top itself, but excluding -# '.' and '..'), func(arg, dirname, filenames) is called, where -# dirname is the name of the directory and filenames is the list -# of files (and subdirectories etc.) in the directory. -# The func may modify the filenames list, to implement a filter, -# or to impose a different order of visiting. - -def walk(top, func, arg): - """Directory tree walk with callback function. - - For each directory in the directory tree rooted at top (including top - itself, but excluding '.' and '..'), call func(arg, dirname, fnames). - dirname is the name of the directory, and fnames a list of the names of - the files and subdirectories in dirname (excluding '.' and '..'). func - may modify the fnames list in-place (e.g. via del or slice assignment), - and walk will only recurse into the subdirectories whose names remain in - fnames; this can be used to implement a filter, or to impose a specific - order of visiting. No semantics are defined for, or required of, arg, - beyond that arg is always passed to func. It can be used, e.g., to pass - a filename pattern, or a mutable object designed to accumulate - statistics. Passing None for arg is common.""" - warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.", - stacklevel=2) - try: - names = os.listdir(top) - except os.error: - return - func(arg, top, names) - for name in names: - name = join(top, name) - if isdir(name): - walk(name, func, arg) - - -# Expand paths beginning with '~' or '~user'. -# '~' means $HOME; '~user' means that user's home directory. -# If the path doesn't begin with '~', or if the user or $HOME is unknown, -# the path is returned unchanged (leaving error reporting to whatever -# function is called with the expanded path as argument). -# See also module 'glob' for expansion of *, ? and [...] in pathnames. -# (A function should also be defined to do full *sh-style environment -# variable expansion.) - -def expanduser(path): - """Expand ~ and ~user constructs. - - If user or $HOME is unknown, do nothing.""" - if path[:1] != '~': - return path - i, n = 1, len(path) - while i < n and path[i] not in '/\\': - i = i + 1 - - if 'HOME' in os.environ: - userhome = os.environ['HOME'] - elif 'USERPROFILE' in os.environ: - userhome = os.environ['USERPROFILE'] - elif not 'HOMEPATH' in os.environ: - return path - else: - try: - drive = os.environ['HOMEDRIVE'] - except KeyError: - drive = '' - userhome = join(drive, os.environ['HOMEPATH']) - - if i != 1: #~user - userhome = join(dirname(userhome), path[1:i]) - - return userhome + path[i:] - - -# Expand paths containing shell variable substitutions. -# The following rules apply: -# - no expansion within single quotes -# - '$$' is translated into '$' -# - '%%' is translated into '%' if '%%' are not seen in %var1%%var2% -# - ${varname} is accepted. -# - $varname is accepted. -# - %varname% is accepted. -# - varnames can be made out of letters, digits and the characters '_-' -# (though is not verified in the ${varname} and %varname% cases) -# XXX With COMMAND.COM you can use any characters in a variable name, -# XXX except '^|<>='. - -def expandvars(path): - """Expand shell variables of the forms $var, ${var} and %var%. - - Unknown variables are left unchanged.""" - if '$' not in path and '%' not in path: - return path - import string - varchars = string.ascii_letters + string.digits + '_-' - if isinstance(path, _unicode): - encoding = sys.getfilesystemencoding() - def getenv(var): - return os.environ[var.encode(encoding)].decode(encoding) - else: - def getenv(var): - return os.environ[var] - res = '' - index = 0 - pathlen = len(path) - while index < pathlen: - c = path[index] - if c == '\'': # no expansion within single quotes - path = path[index + 1:] - pathlen = len(path) - try: - index = path.index('\'') - res = res + '\'' + path[:index + 1] - except ValueError: - res = res + c + path - index = pathlen - 1 - elif c == '%': # variable or '%' - if path[index + 1:index + 2] == '%': - res = res + c - index = index + 1 - else: - path = path[index+1:] - pathlen = len(path) - try: - index = path.index('%') - except ValueError: - res = res + '%' + path - index = pathlen - 1 - else: - var = path[:index] - try: - res = res + getenv(var) - except KeyError: - res = res + '%' + var + '%' - elif c == '$': # variable or '$$' - if path[index + 1:index + 2] == '$': - res = res + c - index = index + 1 - elif path[index + 1:index + 2] == '{': - path = path[index+2:] - pathlen = len(path) - try: - index = path.index('}') - var = path[:index] - try: - res = res + getenv(var) - except KeyError: - res = res + '${' + var + '}' - except ValueError: - res = res + '${' + path - index = pathlen - 1 - else: - var = '' - index = index + 1 - c = path[index:index + 1] - while c != '' and c in varchars: - var = var + c - index = index + 1 - c = path[index:index + 1] - try: - res = res + getenv(var) - except KeyError: - res = res + '$' + var - if c != '': - index = index - 1 - else: - res = res + c - index = index + 1 - return res - - -# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A\B. -# Previously, this function also truncated pathnames to 8+3 format, -# but as this module is called "ntpath", that's obviously wrong! - -def normpath(path): - """Normalize path, eliminating double slashes, etc.""" - # Preserve unicode (if path is unicode) - backslash, dot = (u'\\', u'.') if isinstance(path, _unicode) else ('\\', '.') - if path.startswith(('\\\\.\\', '\\\\?\\')): - # in the case of paths with these prefixes: - # \\.\ -> device names - # \\?\ -> literal paths - # do not do any normalization, but return the path unchanged - return path - path = path.replace("/", "\\") - prefix, path = splitdrive(path) - # We need to be careful here. If the prefix is empty, and the path starts - # with a backslash, it could either be an absolute path on the current - # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It - # is therefore imperative NOT to collapse multiple backslashes blindly in - # that case. - # The code below preserves multiple backslashes when there is no drive - # letter. This means that the invalid filename \\\a\b is preserved - # unchanged, where a\\\b is normalised to a\b. It's not clear that there - # is any better behaviour for such edge cases. - if prefix == '': - # No drive letter - preserve initial backslashes - while path[:1] == "\\": - prefix = prefix + backslash - path = path[1:] - else: - # We have a drive letter - collapse initial backslashes - if path.startswith("\\"): - prefix = prefix + backslash - path = path.lstrip("\\") - comps = path.split("\\") - i = 0 - while i < len(comps): - if comps[i] in ('.', ''): - del comps[i] - elif comps[i] == '..': - if i > 0 and comps[i-1] != '..': - del comps[i-1:i+1] - i -= 1 - elif i == 0 and prefix.endswith("\\"): - del comps[i] - else: - i += 1 - else: - i += 1 - # If the path is now empty, substitute '.' - if not prefix and not comps: - comps.append(dot) - return prefix + backslash.join(comps) - - -# Return an absolute path. -try: - from nt import _getfullpathname - -except ImportError: # not running on Windows - mock up something sensible - def abspath(path): - """Return the absolute version of a path.""" - if not isabs(path): - if isinstance(path, _unicode): - cwd = os.getcwdu() - else: - cwd = os.getcwd() - path = join(cwd, path) - return normpath(path) - -else: # use native Windows method on Windows - def abspath(path): - """Return the absolute version of a path.""" - - if path: # Empty path must return current working directory. - try: - path = _getfullpathname(path) - except WindowsError: - pass # Bad path - return unchanged. - elif isinstance(path, _unicode): - path = os.getcwdu() - else: - path = os.getcwd() - return normpath(path) - -# realpath is a no-op on systems without islink support -realpath = abspath -# Win9x family and earlier have no Unicode filename support. -supports_unicode_filenames = (hasattr(sys, "getwindowsversion") and - sys.getwindowsversion()[3] >= 2) - -def _abspath_split(path): - abs = abspath(normpath(path)) - prefix, rest = splitunc(abs) - is_unc = bool(prefix) - if not is_unc: - prefix, rest = splitdrive(abs) - return is_unc, prefix, [x for x in rest.split(sep) if x] - -def relpath(path, start=curdir): - """Return a relative version of a path""" - - if not path: - raise ValueError("no path specified") - - start_is_unc, start_prefix, start_list = _abspath_split(start) - path_is_unc, path_prefix, path_list = _abspath_split(path) - - if path_is_unc ^ start_is_unc: - raise ValueError("Cannot mix UNC and non-UNC paths (%s and %s)" - % (path, start)) - if path_prefix.lower() != start_prefix.lower(): - if path_is_unc: - raise ValueError("path is on UNC root %s, start on UNC root %s" - % (path_prefix, start_prefix)) - else: - raise ValueError("path is on drive %s, start on drive %s" - % (path_prefix, start_prefix)) - # Work out how much of the filepath is shared by start and path. - i = 0 - for e1, e2 in zip(start_list, path_list): - if e1.lower() != e2.lower(): - break - i += 1 - - rel_list = [pardir] * (len(start_list)-i) + path_list[i:] - if not rel_list: - return curdir - return join(*rel_list) - -try: - # The genericpath.isdir implementation uses os.stat and checks the mode - # attribute to tell whether or not the path is a directory. - # This is overkill on Windows - just pass the path to GetFileAttributes - # and check the attribute from there. - from nt import _isdir as isdir -except ImportError: - # Use genericpath.isdir as imported above. - pass
--- a/test/lib/python2.7/orig-prefix.txt Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,1 +0,0 @@ -/usr \ No newline at end of file
--- a/test/lib/python2.7/os.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,742 +0,0 @@ -r"""OS routines for NT or Posix depending on what system we're on. - -This exports: - - all functions from posix, nt, os2, or ce, e.g. unlink, stat, etc. - - os.path is one of the modules posixpath, or ntpath - - os.name is 'posix', 'nt', 'os2', 'ce' or 'riscos' - - os.curdir is a string representing the current directory ('.' or ':') - - os.pardir is a string representing the parent directory ('..' or '::') - - os.sep is the (or a most common) pathname separator ('/' or ':' or '\\') - - os.extsep is the extension separator ('.' or '/') - - os.altsep is the alternate pathname separator (None or '/') - - os.pathsep is the component separator used in $PATH etc - - os.linesep is the line separator in text files ('\r' or '\n' or '\r\n') - - os.defpath is the default search path for executables - - os.devnull is the file path of the null device ('/dev/null', etc.) - -Programs that import and use 'os' stand a better chance of being -portable between different platforms. Of course, they must then -only use functions that are defined by all platforms (e.g., unlink -and opendir), and leave all pathname manipulation to os.path -(e.g., split and join). -""" - -#' - -import sys, errno - -_names = sys.builtin_module_names - -# Note: more names are added to __all__ later. -__all__ = ["altsep", "curdir", "pardir", "sep", "extsep", "pathsep", "linesep", - "defpath", "name", "path", "devnull", - "SEEK_SET", "SEEK_CUR", "SEEK_END"] - -def _get_exports_list(module): - try: - return list(module.__all__) - except AttributeError: - return [n for n in dir(module) if n[0] != '_'] - -if 'posix' in _names: - name = 'posix' - linesep = '\n' - from posix import * - try: - from posix import _exit - except ImportError: - pass - import posixpath as path - - import posix - __all__.extend(_get_exports_list(posix)) - del posix - -elif 'nt' in _names: - name = 'nt' - linesep = '\r\n' - from nt import * - try: - from nt import _exit - except ImportError: - pass - import ntpath as path - - import nt - __all__.extend(_get_exports_list(nt)) - del nt - -elif 'os2' in _names: - name = 'os2' - linesep = '\r\n' - from os2 import * - try: - from os2 import _exit - except ImportError: - pass - if sys.version.find('EMX GCC') == -1: - import ntpath as path - else: - import os2emxpath as path - from _emx_link import link - - import os2 - __all__.extend(_get_exports_list(os2)) - del os2 - -elif 'ce' in _names: - name = 'ce' - linesep = '\r\n' - from ce import * - try: - from ce import _exit - except ImportError: - pass - # We can use the standard Windows path. - import ntpath as path - - import ce - __all__.extend(_get_exports_list(ce)) - del ce - -elif 'riscos' in _names: - name = 'riscos' - linesep = '\n' - from riscos import * - try: - from riscos import _exit - except ImportError: - pass - import riscospath as path - - import riscos - __all__.extend(_get_exports_list(riscos)) - del riscos - -else: - raise ImportError, 'no os specific module found' - -sys.modules['os.path'] = path -from os.path import (curdir, pardir, sep, pathsep, defpath, extsep, altsep, - devnull) - -del _names - -# Python uses fixed values for the SEEK_ constants; they are mapped -# to native constants if necessary in posixmodule.c -SEEK_SET = 0 -SEEK_CUR = 1 -SEEK_END = 2 - -#' - -# Super directory utilities. -# (Inspired by Eric Raymond; the doc strings are mostly his) - -def makedirs(name, mode=0777): - """makedirs(path [, mode=0777]) - - Super-mkdir; create a leaf directory and all intermediate ones. - Works like mkdir, except that any intermediate path segment (not - just the rightmost) will be created if it does not exist. This is - recursive. - - """ - head, tail = path.split(name) - if not tail: - head, tail = path.split(head) - if head and tail and not path.exists(head): - try: - makedirs(head, mode) - except OSError, e: - # be happy if someone already created the path - if e.errno != errno.EEXIST: - raise - if tail == curdir: # xxx/newdir/. exists if xxx/newdir exists - return - mkdir(name, mode) - -def removedirs(name): - """removedirs(path) - - Super-rmdir; remove a leaf directory and all empty intermediate - ones. Works like rmdir except that, if the leaf directory is - successfully removed, directories corresponding to rightmost path - segments will be pruned away until either the whole path is - consumed or an error occurs. Errors during this latter phase are - ignored -- they generally mean that a directory was not empty. - - """ - rmdir(name) - head, tail = path.split(name) - if not tail: - head, tail = path.split(head) - while head and tail: - try: - rmdir(head) - except error: - break - head, tail = path.split(head) - -def renames(old, new): - """renames(old, new) - - Super-rename; create directories as necessary and delete any left - empty. Works like rename, except creation of any intermediate - directories needed to make the new pathname good is attempted - first. After the rename, directories corresponding to rightmost - path segments of the old name will be pruned until either the - whole path is consumed or a nonempty directory is found. - - Note: this function can fail with the new directory structure made - if you lack permissions needed to unlink the leaf directory or - file. - - """ - head, tail = path.split(new) - if head and tail and not path.exists(head): - makedirs(head) - rename(old, new) - head, tail = path.split(old) - if head and tail: - try: - removedirs(head) - except error: - pass - -__all__.extend(["makedirs", "removedirs", "renames"]) - -def walk(top, topdown=True, onerror=None, followlinks=False): - """Directory tree generator. - - For each directory in the directory tree rooted at top (including top - itself, but excluding '.' and '..'), yields a 3-tuple - - dirpath, dirnames, filenames - - dirpath is a string, the path to the directory. dirnames is a list of - the names of the subdirectories in dirpath (excluding '.' and '..'). - filenames is a list of the names of the non-directory files in dirpath. - Note that the names in the lists are just names, with no path components. - To get a full path (which begins with top) to a file or directory in - dirpath, do os.path.join(dirpath, name). - - If optional arg 'topdown' is true or not specified, the triple for a - directory is generated before the triples for any of its subdirectories - (directories are generated top down). If topdown is false, the triple - for a directory is generated after the triples for all of its - subdirectories (directories are generated bottom up). - - When topdown is true, the caller can modify the dirnames list in-place - (e.g., via del or slice assignment), and walk will only recurse into the - subdirectories whose names remain in dirnames; this can be used to prune the - search, or to impose a specific order of visiting. Modifying dirnames when - topdown is false is ineffective, since the directories in dirnames have - already been generated by the time dirnames itself is generated. No matter - the value of topdown, the list of subdirectories is retrieved before the - tuples for the directory and its subdirectories are generated. - - By default errors from the os.listdir() call are ignored. If - optional arg 'onerror' is specified, it should be a function; it - will be called with one argument, an os.error instance. It can - report the error to continue with the walk, or raise the exception - to abort the walk. Note that the filename is available as the - filename attribute of the exception object. - - By default, os.walk does not follow symbolic links to subdirectories on - systems that support them. In order to get this functionality, set the - optional argument 'followlinks' to true. - - Caution: if you pass a relative pathname for top, don't change the - current working directory between resumptions of walk. walk never - changes the current directory, and assumes that the client doesn't - either. - - Example: - - import os - from os.path import join, getsize - for root, dirs, files in os.walk('python/Lib/email'): - print root, "consumes", - print sum([getsize(join(root, name)) for name in files]), - print "bytes in", len(files), "non-directory files" - if 'CVS' in dirs: - dirs.remove('CVS') # don't visit CVS directories - - """ - - islink, join, isdir = path.islink, path.join, path.isdir - - # We may not have read permission for top, in which case we can't - # get a list of the files the directory contains. os.path.walk - # always suppressed the exception then, rather than blow up for a - # minor reason when (say) a thousand readable directories are still - # left to visit. That logic is copied here. - try: - # Note that listdir and error are globals in this module due - # to earlier import-*. - names = listdir(top) - except error, err: - if onerror is not None: - onerror(err) - return - - dirs, nondirs = [], [] - for name in names: - if isdir(join(top, name)): - dirs.append(name) - else: - nondirs.append(name) - - if topdown: - yield top, dirs, nondirs - for name in dirs: - new_path = join(top, name) - if followlinks or not islink(new_path): - for x in walk(new_path, topdown, onerror, followlinks): - yield x - if not topdown: - yield top, dirs, nondirs - -__all__.append("walk") - -# Make sure os.environ exists, at least -try: - environ -except NameError: - environ = {} - -def execl(file, *args): - """execl(file, *args) - - Execute the executable file with argument list args, replacing the - current process. """ - execv(file, args) - -def execle(file, *args): - """execle(file, *args, env) - - Execute the executable file with argument list args and - environment env, replacing the current process. """ - env = args[-1] - execve(file, args[:-1], env) - -def execlp(file, *args): - """execlp(file, *args) - - Execute the executable file (which is searched for along $PATH) - with argument list args, replacing the current process. """ - execvp(file, args) - -def execlpe(file, *args): - """execlpe(file, *args, env) - - Execute the executable file (which is searched for along $PATH) - with argument list args and environment env, replacing the current - process. """ - env = args[-1] - execvpe(file, args[:-1], env) - -def execvp(file, args): - """execvp(file, args) - - Execute the executable file (which is searched for along $PATH) - with argument list args, replacing the current process. - args may be a list or tuple of strings. """ - _execvpe(file, args) - -def execvpe(file, args, env): - """execvpe(file, args, env) - - Execute the executable file (which is searched for along $PATH) - with argument list args and environment env , replacing the - current process. - args may be a list or tuple of strings. """ - _execvpe(file, args, env) - -__all__.extend(["execl","execle","execlp","execlpe","execvp","execvpe"]) - -def _execvpe(file, args, env=None): - if env is not None: - func = execve - argrest = (args, env) - else: - func = execv - argrest = (args,) - env = environ - - head, tail = path.split(file) - if head: - func(file, *argrest) - return - if 'PATH' in env: - envpath = env['PATH'] - else: - envpath = defpath - PATH = envpath.split(pathsep) - saved_exc = None - saved_tb = None - for dir in PATH: - fullname = path.join(dir, file) - try: - func(fullname, *argrest) - except error, e: - tb = sys.exc_info()[2] - if (e.errno != errno.ENOENT and e.errno != errno.ENOTDIR - and saved_exc is None): - saved_exc = e - saved_tb = tb - if saved_exc: - raise error, saved_exc, saved_tb - raise error, e, tb - -# Change environ to automatically call putenv() if it exists -try: - # This will fail if there's no putenv - putenv -except NameError: - pass -else: - import UserDict - - # Fake unsetenv() for Windows - # not sure about os2 here but - # I'm guessing they are the same. - - if name in ('os2', 'nt'): - def unsetenv(key): - putenv(key, "") - - if name == "riscos": - # On RISC OS, all env access goes through getenv and putenv - from riscosenviron import _Environ - elif name in ('os2', 'nt'): # Where Env Var Names Must Be UPPERCASE - # But we store them as upper case - class _Environ(UserDict.IterableUserDict): - def __init__(self, environ): - UserDict.UserDict.__init__(self) - data = self.data - for k, v in environ.items(): - data[k.upper()] = v - def __setitem__(self, key, item): - putenv(key, item) - self.data[key.upper()] = item - def __getitem__(self, key): - return self.data[key.upper()] - try: - unsetenv - except NameError: - def __delitem__(self, key): - del self.data[key.upper()] - else: - def __delitem__(self, key): - unsetenv(key) - del self.data[key.upper()] - def clear(self): - for key in self.data.keys(): - unsetenv(key) - del self.data[key] - def pop(self, key, *args): - unsetenv(key) - return self.data.pop(key.upper(), *args) - def has_key(self, key): - return key.upper() in self.data - def __contains__(self, key): - return key.upper() in self.data - def get(self, key, failobj=None): - return self.data.get(key.upper(), failobj) - def update(self, dict=None, **kwargs): - if dict: - try: - keys = dict.keys() - except AttributeError: - # List of (key, value) - for k, v in dict: - self[k] = v - else: - # got keys - # cannot use items(), since mappings - # may not have them. - for k in keys: - self[k] = dict[k] - if kwargs: - self.update(kwargs) - def copy(self): - return dict(self) - - else: # Where Env Var Names Can Be Mixed Case - class _Environ(UserDict.IterableUserDict): - def __init__(self, environ): - UserDict.UserDict.__init__(self) - self.data = environ - def __setitem__(self, key, item): - putenv(key, item) - self.data[key] = item - def update(self, dict=None, **kwargs): - if dict: - try: - keys = dict.keys() - except AttributeError: - # List of (key, value) - for k, v in dict: - self[k] = v - else: - # got keys - # cannot use items(), since mappings - # may not have them. - for k in keys: - self[k] = dict[k] - if kwargs: - self.update(kwargs) - try: - unsetenv - except NameError: - pass - else: - def __delitem__(self, key): - unsetenv(key) - del self.data[key] - def clear(self): - for key in self.data.keys(): - unsetenv(key) - del self.data[key] - def pop(self, key, *args): - unsetenv(key) - return self.data.pop(key, *args) - def copy(self): - return dict(self) - - - environ = _Environ(environ) - -def getenv(key, default=None): - """Get an environment variable, return None if it doesn't exist. - The optional second argument can specify an alternate default.""" - return environ.get(key, default) -__all__.append("getenv") - -def _exists(name): - return name in globals() - -# Supply spawn*() (probably only for Unix) -if _exists("fork") and not _exists("spawnv") and _exists("execv"): - - P_WAIT = 0 - P_NOWAIT = P_NOWAITO = 1 - - # XXX Should we support P_DETACH? I suppose it could fork()**2 - # and close the std I/O streams. Also, P_OVERLAY is the same - # as execv*()? - - def _spawnvef(mode, file, args, env, func): - # Internal helper; func is the exec*() function to use - pid = fork() - if not pid: - # Child - try: - if env is None: - func(file, args) - else: - func(file, args, env) - except: - _exit(127) - else: - # Parent - if mode == P_NOWAIT: - return pid # Caller is responsible for waiting! - while 1: - wpid, sts = waitpid(pid, 0) - if WIFSTOPPED(sts): - continue - elif WIFSIGNALED(sts): - return -WTERMSIG(sts) - elif WIFEXITED(sts): - return WEXITSTATUS(sts) - else: - raise error, "Not stopped, signaled or exited???" - - def spawnv(mode, file, args): - """spawnv(mode, file, args) -> integer - -Execute file with arguments from args in a subprocess. -If mode == P_NOWAIT return the pid of the process. -If mode == P_WAIT return the process's exit code if it exits normally; -otherwise return -SIG, where SIG is the signal that killed it. """ - return _spawnvef(mode, file, args, None, execv) - - def spawnve(mode, file, args, env): - """spawnve(mode, file, args, env) -> integer - -Execute file with arguments from args in a subprocess with the -specified environment. -If mode == P_NOWAIT return the pid of the process. -If mode == P_WAIT return the process's exit code if it exits normally; -otherwise return -SIG, where SIG is the signal that killed it. """ - return _spawnvef(mode, file, args, env, execve) - - # Note: spawnvp[e] is't currently supported on Windows - - def spawnvp(mode, file, args): - """spawnvp(mode, file, args) -> integer - -Execute file (which is looked for along $PATH) with arguments from -args in a subprocess. -If mode == P_NOWAIT return the pid of the process. -If mode == P_WAIT return the process's exit code if it exits normally; -otherwise return -SIG, where SIG is the signal that killed it. """ - return _spawnvef(mode, file, args, None, execvp) - - def spawnvpe(mode, file, args, env): - """spawnvpe(mode, file, args, env) -> integer - -Execute file (which is looked for along $PATH) with arguments from -args in a subprocess with the supplied environment. -If mode == P_NOWAIT return the pid of the process. -If mode == P_WAIT return the process's exit code if it exits normally; -otherwise return -SIG, where SIG is the signal that killed it. """ - return _spawnvef(mode, file, args, env, execvpe) - -if _exists("spawnv"): - # These aren't supplied by the basic Windows code - # but can be easily implemented in Python - - def spawnl(mode, file, *args): - """spawnl(mode, file, *args) -> integer - -Execute file with arguments from args in a subprocess. -If mode == P_NOWAIT return the pid of the process. -If mode == P_WAIT return the process's exit code if it exits normally; -otherwise return -SIG, where SIG is the signal that killed it. """ - return spawnv(mode, file, args) - - def spawnle(mode, file, *args): - """spawnle(mode, file, *args, env) -> integer - -Execute file with arguments from args in a subprocess with the -supplied environment. -If mode == P_NOWAIT return the pid of the process. -If mode == P_WAIT return the process's exit code if it exits normally; -otherwise return -SIG, where SIG is the signal that killed it. """ - env = args[-1] - return spawnve(mode, file, args[:-1], env) - - - __all__.extend(["spawnv", "spawnve", "spawnl", "spawnle",]) - - -if _exists("spawnvp"): - # At the moment, Windows doesn't implement spawnvp[e], - # so it won't have spawnlp[e] either. - def spawnlp(mode, file, *args): - """spawnlp(mode, file, *args) -> integer - -Execute file (which is looked for along $PATH) with arguments from -args in a subprocess with the supplied environment. -If mode == P_NOWAIT return the pid of the process. -If mode == P_WAIT return the process's exit code if it exits normally; -otherwise return -SIG, where SIG is the signal that killed it. """ - return spawnvp(mode, file, args) - - def spawnlpe(mode, file, *args): - """spawnlpe(mode, file, *args, env) -> integer - -Execute file (which is looked for along $PATH) with arguments from -args in a subprocess with the supplied environment. -If mode == P_NOWAIT return the pid of the process. -If mode == P_WAIT return the process's exit code if it exits normally; -otherwise return -SIG, where SIG is the signal that killed it. """ - env = args[-1] - return spawnvpe(mode, file, args[:-1], env) - - - __all__.extend(["spawnvp", "spawnvpe", "spawnlp", "spawnlpe",]) - - -# Supply popen2 etc. (for Unix) -if _exists("fork"): - if not _exists("popen2"): - def popen2(cmd, mode="t", bufsize=-1): - """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' - may be a sequence, in which case arguments will be passed directly to - the program without shell intervention (as with os.spawnv()). If 'cmd' - is a string it will be passed to the shell (as with os.system()). If - 'bufsize' is specified, it sets the buffer size for the I/O pipes. The - file objects (child_stdin, child_stdout) are returned.""" - import warnings - msg = "os.popen2 is deprecated. Use the subprocess module." - warnings.warn(msg, DeprecationWarning, stacklevel=2) - - import subprocess - PIPE = subprocess.PIPE - p = subprocess.Popen(cmd, shell=isinstance(cmd, basestring), - bufsize=bufsize, stdin=PIPE, stdout=PIPE, - close_fds=True) - return p.stdin, p.stdout - __all__.append("popen2") - - if not _exists("popen3"): - def popen3(cmd, mode="t", bufsize=-1): - """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' - may be a sequence, in which case arguments will be passed directly to - the program without shell intervention (as with os.spawnv()). If 'cmd' - is a string it will be passed to the shell (as with os.system()). If - 'bufsize' is specified, it sets the buffer size for the I/O pipes. The - file objects (child_stdin, child_stdout, child_stderr) are returned.""" - import warnings - msg = "os.popen3 is deprecated. Use the subprocess module." - warnings.warn(msg, DeprecationWarning, stacklevel=2) - - import subprocess - PIPE = subprocess.PIPE - p = subprocess.Popen(cmd, shell=isinstance(cmd, basestring), - bufsize=bufsize, stdin=PIPE, stdout=PIPE, - stderr=PIPE, close_fds=True) - return p.stdin, p.stdout, p.stderr - __all__.append("popen3") - - if not _exists("popen4"): - def popen4(cmd, mode="t", bufsize=-1): - """Execute the shell command 'cmd' in a sub-process. On UNIX, 'cmd' - may be a sequence, in which case arguments will be passed directly to - the program without shell intervention (as with os.spawnv()). If 'cmd' - is a string it will be passed to the shell (as with os.system()). If - 'bufsize' is specified, it sets the buffer size for the I/O pipes. The - file objects (child_stdin, child_stdout_stderr) are returned.""" - import warnings - msg = "os.popen4 is deprecated. Use the subprocess module." - warnings.warn(msg, DeprecationWarning, stacklevel=2) - - import subprocess - PIPE = subprocess.PIPE - p = subprocess.Popen(cmd, shell=isinstance(cmd, basestring), - bufsize=bufsize, stdin=PIPE, stdout=PIPE, - stderr=subprocess.STDOUT, close_fds=True) - return p.stdin, p.stdout - __all__.append("popen4") - -import copy_reg as _copy_reg - -def _make_stat_result(tup, dict): - return stat_result(tup, dict) - -def _pickle_stat_result(sr): - (type, args) = sr.__reduce__() - return (_make_stat_result, args) - -try: - _copy_reg.pickle(stat_result, _pickle_stat_result, _make_stat_result) -except NameError: # stat_result may not exist - pass - -def _make_statvfs_result(tup, dict): - return statvfs_result(tup, dict) - -def _pickle_statvfs_result(sr): - (type, args) = sr.__reduce__() - return (_make_statvfs_result, args) - -try: - _copy_reg.pickle(statvfs_result, _pickle_statvfs_result, - _make_statvfs_result) -except NameError: # statvfs_result may not exist - pass
--- a/test/lib/python2.7/posixpath.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,439 +0,0 @@ -"""Common operations on Posix pathnames. - -Instead of importing this module directly, import os and refer to -this module as os.path. The "os.path" name is an alias for this -module on Posix systems; on other systems (e.g. Mac, Windows), -os.path provides the same operations in a manner specific to that -platform, and is an alias to another module (e.g. macpath, ntpath). - -Some of this can actually be useful on non-Posix systems too, e.g. -for manipulation of the pathname component of URLs. -""" - -import os -import sys -import stat -import genericpath -import warnings -from genericpath import * -from genericpath import _unicode - -__all__ = ["normcase","isabs","join","splitdrive","split","splitext", - "basename","dirname","commonprefix","getsize","getmtime", - "getatime","getctime","islink","exists","lexists","isdir","isfile", - "ismount","walk","expanduser","expandvars","normpath","abspath", - "samefile","sameopenfile","samestat", - "curdir","pardir","sep","pathsep","defpath","altsep","extsep", - "devnull","realpath","supports_unicode_filenames","relpath"] - -# strings representing various path-related bits and pieces -curdir = '.' -pardir = '..' -extsep = '.' -sep = '/' -pathsep = ':' -defpath = ':/bin:/usr/bin' -altsep = None -devnull = '/dev/null' - -# Normalize the case of a pathname. Trivial in Posix, string.lower on Mac. -# On MS-DOS this may also turn slashes into backslashes; however, other -# normalizations (such as optimizing '../' away) are not allowed -# (another function should be defined to do that). - -def normcase(s): - """Normalize case of pathname. Has no effect under Posix""" - return s - - -# Return whether a path is absolute. -# Trivial in Posix, harder on the Mac or MS-DOS. - -def isabs(s): - """Test whether a path is absolute""" - return s.startswith('/') - - -# Join pathnames. -# Ignore the previous parts if a part is absolute. -# Insert a '/' unless the first part is empty or already ends in '/'. - -def join(a, *p): - """Join two or more pathname components, inserting '/' as needed. - If any component is an absolute path, all previous path components - will be discarded. An empty last part will result in a path that - ends with a separator.""" - path = a - for b in p: - if b.startswith('/'): - path = b - elif path == '' or path.endswith('/'): - path += b - else: - path += '/' + b - return path - - -# Split a path in head (everything up to the last '/') and tail (the -# rest). If the path ends in '/', tail will be empty. If there is no -# '/' in the path, head will be empty. -# Trailing '/'es are stripped from head unless it is the root. - -def split(p): - """Split a pathname. Returns tuple "(head, tail)" where "tail" is - everything after the final slash. Either part may be empty.""" - i = p.rfind('/') + 1 - head, tail = p[:i], p[i:] - if head and head != '/'*len(head): - head = head.rstrip('/') - return head, tail - - -# Split a path in root and extension. -# The extension is everything starting at the last dot in the last -# pathname component; the root is everything before that. -# It is always true that root + ext == p. - -def splitext(p): - return genericpath._splitext(p, sep, altsep, extsep) -splitext.__doc__ = genericpath._splitext.__doc__ - -# Split a pathname into a drive specification and the rest of the -# path. Useful on DOS/Windows/NT; on Unix, the drive is always empty. - -def splitdrive(p): - """Split a pathname into drive and path. On Posix, drive is always - empty.""" - return '', p - - -# Return the tail (basename) part of a path, same as split(path)[1]. - -def basename(p): - """Returns the final component of a pathname""" - i = p.rfind('/') + 1 - return p[i:] - - -# Return the head (dirname) part of a path, same as split(path)[0]. - -def dirname(p): - """Returns the directory component of a pathname""" - i = p.rfind('/') + 1 - head = p[:i] - if head and head != '/'*len(head): - head = head.rstrip('/') - return head - - -# Is a path a symbolic link? -# This will always return false on systems where os.lstat doesn't exist. - -def islink(path): - """Test whether a path is a symbolic link""" - try: - st = os.lstat(path) - except (os.error, AttributeError): - return False - return stat.S_ISLNK(st.st_mode) - -# Being true for dangling symbolic links is also useful. - -def lexists(path): - """Test whether a path exists. Returns True for broken symbolic links""" - try: - os.lstat(path) - except os.error: - return False - return True - - -# Are two filenames really pointing to the same file? - -def samefile(f1, f2): - """Test whether two pathnames reference the same actual file""" - s1 = os.stat(f1) - s2 = os.stat(f2) - return samestat(s1, s2) - - -# Are two open files really referencing the same file? -# (Not necessarily the same file descriptor!) - -def sameopenfile(fp1, fp2): - """Test whether two open file objects reference the same file""" - s1 = os.fstat(fp1) - s2 = os.fstat(fp2) - return samestat(s1, s2) - - -# Are two stat buffers (obtained from stat, fstat or lstat) -# describing the same file? - -def samestat(s1, s2): - """Test whether two stat buffers reference the same file""" - return s1.st_ino == s2.st_ino and \ - s1.st_dev == s2.st_dev - - -# Is a path a mount point? -# (Does this work for all UNIXes? Is it even guaranteed to work by Posix?) - -def ismount(path): - """Test whether a path is a mount point""" - if islink(path): - # A symlink can never be a mount point - return False - try: - s1 = os.lstat(path) - s2 = os.lstat(join(path, '..')) - except os.error: - return False # It doesn't exist -- so not a mount point :-) - dev1 = s1.st_dev - dev2 = s2.st_dev - if dev1 != dev2: - return True # path/.. on a different device as path - ino1 = s1.st_ino - ino2 = s2.st_ino - if ino1 == ino2: - return True # path/.. is the same i-node as path - return False - - -# Directory tree walk. -# For each directory under top (including top itself, but excluding -# '.' and '..'), func(arg, dirname, filenames) is called, where -# dirname is the name of the directory and filenames is the list -# of files (and subdirectories etc.) in the directory. -# The func may modify the filenames list, to implement a filter, -# or to impose a different order of visiting. - -def walk(top, func, arg): - """Directory tree walk with callback function. - - For each directory in the directory tree rooted at top (including top - itself, but excluding '.' and '..'), call func(arg, dirname, fnames). - dirname is the name of the directory, and fnames a list of the names of - the files and subdirectories in dirname (excluding '.' and '..'). func - may modify the fnames list in-place (e.g. via del or slice assignment), - and walk will only recurse into the subdirectories whose names remain in - fnames; this can be used to implement a filter, or to impose a specific - order of visiting. No semantics are defined for, or required of, arg, - beyond that arg is always passed to func. It can be used, e.g., to pass - a filename pattern, or a mutable object designed to accumulate - statistics. Passing None for arg is common.""" - warnings.warnpy3k("In 3.x, os.path.walk is removed in favor of os.walk.", - stacklevel=2) - try: - names = os.listdir(top) - except os.error: - return - func(arg, top, names) - for name in names: - name = join(top, name) - try: - st = os.lstat(name) - except os.error: - continue - if stat.S_ISDIR(st.st_mode): - walk(name, func, arg) - - -# Expand paths beginning with '~' or '~user'. -# '~' means $HOME; '~user' means that user's home directory. -# If the path doesn't begin with '~', or if the user or $HOME is unknown, -# the path is returned unchanged (leaving error reporting to whatever -# function is called with the expanded path as argument). -# See also module 'glob' for expansion of *, ? and [...] in pathnames. -# (A function should also be defined to do full *sh-style environment -# variable expansion.) - -def expanduser(path): - """Expand ~ and ~user constructions. If user or $HOME is unknown, - do nothing.""" - if not path.startswith('~'): - return path - i = path.find('/', 1) - if i < 0: - i = len(path) - if i == 1: - if 'HOME' not in os.environ: - import pwd - userhome = pwd.getpwuid(os.getuid()).pw_dir - else: - userhome = os.environ['HOME'] - else: - import pwd - try: - pwent = pwd.getpwnam(path[1:i]) - except KeyError: - return path - userhome = pwent.pw_dir - userhome = userhome.rstrip('/') - return (userhome + path[i:]) or '/' - - -# Expand paths containing shell variable substitutions. -# This expands the forms $variable and ${variable} only. -# Non-existent variables are left unchanged. - -_varprog = None -_uvarprog = None - -def expandvars(path): - """Expand shell variables of form $var and ${var}. Unknown variables - are left unchanged.""" - global _varprog, _uvarprog - if '$' not in path: - return path - if isinstance(path, _unicode): - if not _uvarprog: - import re - _uvarprog = re.compile(ur'\$(\w+|\{[^}]*\})', re.UNICODE) - varprog = _uvarprog - encoding = sys.getfilesystemencoding() - else: - if not _varprog: - import re - _varprog = re.compile(r'\$(\w+|\{[^}]*\})') - varprog = _varprog - encoding = None - i = 0 - while True: - m = varprog.search(path, i) - if not m: - break - i, j = m.span(0) - name = m.group(1) - if name.startswith('{') and name.endswith('}'): - name = name[1:-1] - if encoding: - name = name.encode(encoding) - if name in os.environ: - tail = path[j:] - value = os.environ[name] - if encoding: - value = value.decode(encoding) - path = path[:i] + value - i = len(path) - path += tail - else: - i = j - return path - - -# Normalize a path, e.g. A//B, A/./B and A/foo/../B all become A/B. -# It should be understood that this may change the meaning of the path -# if it contains symbolic links! - -def normpath(path): - """Normalize path, eliminating double slashes, etc.""" - # Preserve unicode (if path is unicode) - slash, dot = (u'/', u'.') if isinstance(path, _unicode) else ('/', '.') - if path == '': - return dot - initial_slashes = path.startswith('/') - # POSIX allows one or two initial slashes, but treats three or more - # as single slash. - if (initial_slashes and - path.startswith('//') and not path.startswith('///')): - initial_slashes = 2 - comps = path.split('/') - new_comps = [] - for comp in comps: - if comp in ('', '.'): - continue - if (comp != '..' or (not initial_slashes and not new_comps) or - (new_comps and new_comps[-1] == '..')): - new_comps.append(comp) - elif new_comps: - new_comps.pop() - comps = new_comps - path = slash.join(comps) - if initial_slashes: - path = slash*initial_slashes + path - return path or dot - - -def abspath(path): - """Return an absolute path.""" - if not isabs(path): - if isinstance(path, _unicode): - cwd = os.getcwdu() - else: - cwd = os.getcwd() - path = join(cwd, path) - return normpath(path) - - -# Return a canonical path (i.e. the absolute location of a file on the -# filesystem). - -def realpath(filename): - """Return the canonical path of the specified filename, eliminating any -symbolic links encountered in the path.""" - path, ok = _joinrealpath('', filename, {}) - return abspath(path) - -# Join two paths, normalizing and eliminating any symbolic links -# encountered in the second path. -def _joinrealpath(path, rest, seen): - if isabs(rest): - rest = rest[1:] - path = sep - - while rest: - name, _, rest = rest.partition(sep) - if not name or name == curdir: - # current dir - continue - if name == pardir: - # parent dir - if path: - path, name = split(path) - if name == pardir: - path = join(path, pardir, pardir) - else: - path = pardir - continue - newpath = join(path, name) - if not islink(newpath): - path = newpath - continue - # Resolve the symbolic link - if newpath in seen: - # Already seen this path - path = seen[newpath] - if path is not None: - # use cached value - continue - # The symlink is not resolved, so we must have a symlink loop. - # Return already resolved part + rest of the path unchanged. - return join(newpath, rest), False - seen[newpath] = None # not resolved symlink - path, ok = _joinrealpath(path, os.readlink(newpath), seen) - if not ok: - return join(path, rest), False - seen[newpath] = path # resolved symlink - - return path, True - - -supports_unicode_filenames = (sys.platform == 'darwin') - -def relpath(path, start=curdir): - """Return a relative version of a path""" - - if not path: - raise ValueError("no path specified") - - start_list = [x for x in abspath(start).split(sep) if x] - path_list = [x for x in abspath(path).split(sep) if x] - - # Work out how much of the filepath is shared by start and path. - i = len(commonprefix([start_list, path_list])) - - rel_list = [pardir] * (len(start_list)-i) + path_list[i:] - if not rel_list: - return curdir - return join(*rel_list)
--- a/test/lib/python2.7/re.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,340 +0,0 @@ -# -# Secret Labs' Regular Expression Engine -# -# re-compatible interface for the sre matching engine -# -# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. -# -# This version of the SRE library can be redistributed under CNRI's -# Python 1.6 license. For any other use, please contact Secret Labs -# AB (info@pythonware.com). -# -# Portions of this engine have been developed in cooperation with -# CNRI. Hewlett-Packard provided funding for 1.6 integration and -# other compatibility work. -# - -r"""Support for regular expressions (RE). - -This module provides regular expression matching operations similar to -those found in Perl. It supports both 8-bit and Unicode strings; both -the pattern and the strings being processed can contain null bytes and -characters outside the US ASCII range. - -Regular expressions can contain both special and ordinary characters. -Most ordinary characters, like "A", "a", or "0", are the simplest -regular expressions; they simply match themselves. You can -concatenate ordinary characters, so last matches the string 'last'. - -The special characters are: - "." Matches any character except a newline. - "^" Matches the start of the string. - "$" Matches the end of the string or just before the newline at - the end of the string. - "*" Matches 0 or more (greedy) repetitions of the preceding RE. - Greedy means that it will match as many repetitions as possible. - "+" Matches 1 or more (greedy) repetitions of the preceding RE. - "?" Matches 0 or 1 (greedy) of the preceding RE. - *?,+?,?? Non-greedy versions of the previous three special characters. - {m,n} Matches from m to n repetitions of the preceding RE. - {m,n}? Non-greedy version of the above. - "\\" Either escapes special characters or signals a special sequence. - [] Indicates a set of characters. - A "^" as the first character indicates a complementing set. - "|" A|B, creates an RE that will match either A or B. - (...) Matches the RE inside the parentheses. - The contents can be retrieved or matched later in the string. - (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below). - (?:...) Non-grouping version of regular parentheses. - (?P<name>...) The substring matched by the group is accessible by name. - (?P=name) Matches the text matched earlier by the group named name. - (?#...) A comment; ignored. - (?=...) Matches if ... matches next, but doesn't consume the string. - (?!...) Matches if ... doesn't match next. - (?<=...) Matches if preceded by ... (must be fixed length). - (?<!...) Matches if not preceded by ... (must be fixed length). - (?(id/name)yes|no) Matches yes pattern if the group with id/name matched, - the (optional) no pattern otherwise. - -The special sequences consist of "\\" and a character from the list -below. If the ordinary character is not on the list, then the -resulting RE will match the second character. - \number Matches the contents of the group of the same number. - \A Matches only at the start of the string. - \Z Matches only at the end of the string. - \b Matches the empty string, but only at the start or end of a word. - \B Matches the empty string, but not at the start or end of a word. - \d Matches any decimal digit; equivalent to the set [0-9]. - \D Matches any non-digit character; equivalent to the set [^0-9]. - \s Matches any whitespace character; equivalent to [ \t\n\r\f\v]. - \S Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v]. - \w Matches any alphanumeric character; equivalent to [a-zA-Z0-9_]. - With LOCALE, it will match the set [0-9_] plus characters defined - as letters for the current locale. - \W Matches the complement of \w. - \\ Matches a literal backslash. - -This module exports the following functions: - match Match a regular expression pattern to the beginning of a string. - search Search a string for the presence of a pattern. - sub Substitute occurrences of a pattern found in a string. - subn Same as sub, but also return the number of substitutions made. - split Split a string by the occurrences of a pattern. - findall Find all occurrences of a pattern in a string. - finditer Return an iterator yielding a match object for each match. - compile Compile a pattern into a RegexObject. - purge Clear the regular expression cache. - escape Backslash all non-alphanumerics in a string. - -Some of the functions in this module takes flags as optional parameters: - I IGNORECASE Perform case-insensitive matching. - L LOCALE Make \w, \W, \b, \B, dependent on the current locale. - M MULTILINE "^" matches the beginning of lines (after a newline) - as well as the string. - "$" matches the end of lines (before a newline) as well - as the end of the string. - S DOTALL "." matches any character at all, including the newline. - X VERBOSE Ignore whitespace and comments for nicer looking RE's. - U UNICODE Make \w, \W, \b, \B, dependent on the Unicode locale. - -This module also defines an exception 'error'. - -""" - -import sys -import sre_compile -import sre_parse -try: - import _locale -except ImportError: - _locale = None - -# public symbols -__all__ = [ "match", "search", "sub", "subn", "split", "findall", - "compile", "purge", "template", "escape", "I", "L", "M", "S", "X", - "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", - "UNICODE", "error" ] - -__version__ = "2.2.1" - -# flags -I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case -L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale -U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale -M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline -S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline -X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments - -# sre extensions (experimental, don't rely on these) -T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking -DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation - -# sre exception -error = sre_compile.error - -# -------------------------------------------------------------------- -# public interface - -def match(pattern, string, flags=0): - """Try to apply the pattern at the start of the string, returning - a match object, or None if no match was found.""" - return _compile(pattern, flags).match(string) - -def search(pattern, string, flags=0): - """Scan through string looking for a match to the pattern, returning - a match object, or None if no match was found.""" - return _compile(pattern, flags).search(string) - -def sub(pattern, repl, string, count=0, flags=0): - """Return the string obtained by replacing the leftmost - non-overlapping occurrences of the pattern in string by the - replacement repl. repl can be either a string or a callable; - if a string, backslash escapes in it are processed. If it is - a callable, it's passed the match object and must return - a replacement string to be used.""" - return _compile(pattern, flags).sub(repl, string, count) - -def subn(pattern, repl, string, count=0, flags=0): - """Return a 2-tuple containing (new_string, number). - new_string is the string obtained by replacing the leftmost - non-overlapping occurrences of the pattern in the source - string by the replacement repl. number is the number of - substitutions that were made. repl can be either a string or a - callable; if a string, backslash escapes in it are processed. - If it is a callable, it's passed the match object and must - return a replacement string to be used.""" - return _compile(pattern, flags).subn(repl, string, count) - -def split(pattern, string, maxsplit=0, flags=0): - """Split the source string by the occurrences of the pattern, - returning a list containing the resulting substrings.""" - return _compile(pattern, flags).split(string, maxsplit) - -def findall(pattern, string, flags=0): - """Return a list of all non-overlapping matches in the string. - - If one or more groups are present in the pattern, return a - list of groups; this will be a list of tuples if the pattern - has more than one group. - - Empty matches are included in the result.""" - return _compile(pattern, flags).findall(string) - -if sys.hexversion >= 0x02020000: - __all__.append("finditer") - def finditer(pattern, string, flags=0): - """Return an iterator over all non-overlapping matches in the - string. For each match, the iterator returns a match object. - - Empty matches are included in the result.""" - return _compile(pattern, flags).finditer(string) - -def compile(pattern, flags=0): - "Compile a regular expression pattern, returning a pattern object." - return _compile(pattern, flags) - -def purge(): - "Clear the regular expression cache" - _cache.clear() - _cache_repl.clear() - -def template(pattern, flags=0): - "Compile a template pattern, returning a pattern object" - return _compile(pattern, flags|T) - -_alphanum = frozenset( - "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789") - -def escape(pattern): - "Escape all non-alphanumeric characters in pattern." - s = list(pattern) - alphanum = _alphanum - for i, c in enumerate(pattern): - if c not in alphanum: - if c == "\000": - s[i] = "\\000" - else: - s[i] = "\\" + c - return pattern[:0].join(s) - -# -------------------------------------------------------------------- -# internals - -_cache = {} -_cache_repl = {} - -_pattern_type = type(sre_compile.compile("", 0)) - -_MAXCACHE = 100 - -def _compile(*key): - # internal: compile pattern - pattern, flags = key - bypass_cache = flags & DEBUG - if not bypass_cache: - cachekey = (type(key[0]),) + key - try: - p, loc = _cache[cachekey] - if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE): - return p - except KeyError: - pass - if isinstance(pattern, _pattern_type): - if flags: - raise ValueError('Cannot process flags argument with a compiled pattern') - return pattern - if not sre_compile.isstring(pattern): - raise TypeError, "first argument must be string or compiled pattern" - try: - p = sre_compile.compile(pattern, flags) - except error, v: - raise error, v # invalid expression - if not bypass_cache: - if len(_cache) >= _MAXCACHE: - _cache.clear() - if p.flags & LOCALE: - if not _locale: - return p - loc = _locale.setlocale(_locale.LC_CTYPE) - else: - loc = None - _cache[cachekey] = p, loc - return p - -def _compile_repl(*key): - # internal: compile replacement pattern - p = _cache_repl.get(key) - if p is not None: - return p - repl, pattern = key - try: - p = sre_parse.parse_template(repl, pattern) - except error, v: - raise error, v # invalid expression - if len(_cache_repl) >= _MAXCACHE: - _cache_repl.clear() - _cache_repl[key] = p - return p - -def _expand(pattern, match, template): - # internal: match.expand implementation hook - template = sre_parse.parse_template(template, pattern) - return sre_parse.expand_template(template, match) - -def _subx(pattern, template): - # internal: pattern.sub/subn implementation helper - template = _compile_repl(template, pattern) - if not template[0] and len(template[1]) == 1: - # literal replacement - return template[1][0] - def filter(match, template=template): - return sre_parse.expand_template(template, match) - return filter - -# register myself for pickling - -import copy_reg - -def _pickle(p): - return _compile, (p.pattern, p.flags) - -copy_reg.pickle(_pattern_type, _pickle, _compile) - -# -------------------------------------------------------------------- -# experimental stuff (see python-dev discussions for details) - -class Scanner: - def __init__(self, lexicon, flags=0): - from sre_constants import BRANCH, SUBPATTERN - self.lexicon = lexicon - # combine phrases into a compound pattern - p = [] - s = sre_parse.Pattern() - s.flags = flags - for phrase, action in lexicon: - p.append(sre_parse.SubPattern(s, [ - (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))), - ])) - s.groups = len(p)+1 - p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) - self.scanner = sre_compile.compile(p) - def scan(self, string): - result = [] - append = result.append - match = self.scanner.scanner(string).match - i = 0 - while 1: - m = match() - if not m: - break - j = m.end() - if i == j: - break - action = self.lexicon[m.lastindex-1][1] - if hasattr(action, '__call__'): - self.match = m - action = action(self, m.group()) - if action is not None: - append(action) - i = j - return result, string[i:]
--- a/test/lib/python2.7/site.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,758 +0,0 @@ -"""Append module search paths for third-party packages to sys.path. - -**************************************************************** -* This module is automatically imported during initialization. * -**************************************************************** - -In earlier versions of Python (up to 1.5a3), scripts or modules that -needed to use site-specific modules would place ``import site'' -somewhere near the top of their code. Because of the automatic -import, this is no longer necessary (but code that does it still -works). - -This will append site-specific paths to the module search path. On -Unix, it starts with sys.prefix and sys.exec_prefix (if different) and -appends lib/python<version>/site-packages as well as lib/site-python. -It also supports the Debian convention of -lib/python<version>/dist-packages. On other platforms (mainly Mac and -Windows), it uses just sys.prefix (and sys.exec_prefix, if different, -but this is unlikely). The resulting directories, if they exist, are -appended to sys.path, and also inspected for path configuration files. - -FOR DEBIAN, this sys.path is augmented with directories in /usr/local. -Local addons go into /usr/local/lib/python<version>/site-packages -(resp. /usr/local/lib/site-python), Debian addons install into -/usr/{lib,share}/python<version>/dist-packages. - -A path configuration file is a file whose name has the form -<package>.pth; its contents are additional directories (one per line) -to be added to sys.path. Non-existing directories (or -non-directories) are never added to sys.path; no directory is added to -sys.path more than once. Blank lines and lines beginning with -'#' are skipped. Lines starting with 'import' are executed. - -For example, suppose sys.prefix and sys.exec_prefix are set to -/usr/local and there is a directory /usr/local/lib/python2.X/site-packages -with three subdirectories, foo, bar and spam, and two path -configuration files, foo.pth and bar.pth. Assume foo.pth contains the -following: - - # foo package configuration - foo - bar - bletch - -and bar.pth contains: - - # bar package configuration - bar - -Then the following directories are added to sys.path, in this order: - - /usr/local/lib/python2.X/site-packages/bar - /usr/local/lib/python2.X/site-packages/foo - -Note that bletch is omitted because it doesn't exist; bar precedes foo -because bar.pth comes alphabetically before foo.pth; and spam is -omitted because it is not mentioned in either path configuration file. - -After these path manipulations, an attempt is made to import a module -named sitecustomize, which can perform arbitrary additional -site-specific customizations. If this import fails with an -ImportError exception, it is silently ignored. - -""" - -import sys -import os -try: - import __builtin__ as builtins -except ImportError: - import builtins -try: - set -except NameError: - from sets import Set as set - -# Prefixes for site-packages; add additional prefixes like /usr/local here -PREFIXES = [sys.prefix, sys.exec_prefix] -# Enable per user site-packages directory -# set it to False to disable the feature or True to force the feature -ENABLE_USER_SITE = None -# for distutils.commands.install -USER_SITE = None -USER_BASE = None - -_is_64bit = (getattr(sys, 'maxsize', None) or getattr(sys, 'maxint')) > 2**32 -_is_pypy = hasattr(sys, 'pypy_version_info') -_is_jython = sys.platform[:4] == 'java' -if _is_jython: - ModuleType = type(os) - -def makepath(*paths): - dir = os.path.join(*paths) - if _is_jython and (dir == '__classpath__' or - dir.startswith('__pyclasspath__')): - return dir, dir - dir = os.path.abspath(dir) - return dir, os.path.normcase(dir) - -def abs__file__(): - """Set all module' __file__ attribute to an absolute path""" - for m in sys.modules.values(): - if ((_is_jython and not isinstance(m, ModuleType)) or - hasattr(m, '__loader__')): - # only modules need the abspath in Jython. and don't mess - # with a PEP 302-supplied __file__ - continue - f = getattr(m, '__file__', None) - if f is None: - continue - m.__file__ = os.path.abspath(f) - -def removeduppaths(): - """ Remove duplicate entries from sys.path along with making them - absolute""" - # This ensures that the initial path provided by the interpreter contains - # only absolute pathnames, even if we're running from the build directory. - L = [] - known_paths = set() - for dir in sys.path: - # Filter out duplicate paths (on case-insensitive file systems also - # if they only differ in case); turn relative paths into absolute - # paths. - dir, dircase = makepath(dir) - if not dircase in known_paths: - L.append(dir) - known_paths.add(dircase) - sys.path[:] = L - return known_paths - -# XXX This should not be part of site.py, since it is needed even when -# using the -S option for Python. See http://www.python.org/sf/586680 -def addbuilddir(): - """Append ./build/lib.<platform> in case we're running in the build dir - (especially for Guido :-)""" - from distutils.util import get_platform - s = "build/lib.%s-%.3s" % (get_platform(), sys.version) - if hasattr(sys, 'gettotalrefcount'): - s += '-pydebug' - s = os.path.join(os.path.dirname(sys.path[-1]), s) - sys.path.append(s) - -def _init_pathinfo(): - """Return a set containing all existing directory entries from sys.path""" - d = set() - for dir in sys.path: - try: - if os.path.isdir(dir): - dir, dircase = makepath(dir) - d.add(dircase) - except TypeError: - continue - return d - -def addpackage(sitedir, name, known_paths): - """Add a new path to known_paths by combining sitedir and 'name' or execute - sitedir if it starts with 'import'""" - if known_paths is None: - _init_pathinfo() - reset = 1 - else: - reset = 0 - fullname = os.path.join(sitedir, name) - try: - f = open(fullname, "rU") - except IOError: - return - try: - for line in f: - if line.startswith("#"): - continue - if line.startswith("import"): - exec(line) - continue - line = line.rstrip() - dir, dircase = makepath(sitedir, line) - if not dircase in known_paths and os.path.exists(dir): - sys.path.append(dir) - known_paths.add(dircase) - finally: - f.close() - if reset: - known_paths = None - return known_paths - -def addsitedir(sitedir, known_paths=None): - """Add 'sitedir' argument to sys.path if missing and handle .pth files in - 'sitedir'""" - if known_paths is None: - known_paths = _init_pathinfo() - reset = 1 - else: - reset = 0 - sitedir, sitedircase = makepath(sitedir) - if not sitedircase in known_paths: - sys.path.append(sitedir) # Add path component - try: - names = os.listdir(sitedir) - except os.error: - return - names.sort() - for name in names: - if name.endswith(os.extsep + "pth"): - addpackage(sitedir, name, known_paths) - if reset: - known_paths = None - return known_paths - -def addsitepackages(known_paths, sys_prefix=sys.prefix, exec_prefix=sys.exec_prefix): - """Add site-packages (and possibly site-python) to sys.path""" - prefixes = [os.path.join(sys_prefix, "local"), sys_prefix] - if exec_prefix != sys_prefix: - prefixes.append(os.path.join(exec_prefix, "local")) - - for prefix in prefixes: - if prefix: - if sys.platform in ('os2emx', 'riscos') or _is_jython: - sitedirs = [os.path.join(prefix, "Lib", "site-packages")] - elif _is_pypy: - sitedirs = [os.path.join(prefix, 'site-packages')] - elif sys.platform == 'darwin' and prefix == sys_prefix: - - if prefix.startswith("/System/Library/Frameworks/"): # Apple's Python - - sitedirs = [os.path.join("/Library/Python", sys.version[:3], "site-packages"), - os.path.join(prefix, "Extras", "lib", "python")] - - else: # any other Python distros on OSX work this way - sitedirs = [os.path.join(prefix, "lib", - "python" + sys.version[:3], "site-packages")] - - elif os.sep == '/': - sitedirs = [os.path.join(prefix, - "lib", - "python" + sys.version[:3], - "site-packages"), - os.path.join(prefix, "lib", "site-python"), - os.path.join(prefix, "python" + sys.version[:3], "lib-dynload")] - lib64_dir = os.path.join(prefix, "lib64", "python" + sys.version[:3], "site-packages") - if (os.path.exists(lib64_dir) and - os.path.realpath(lib64_dir) not in [os.path.realpath(p) for p in sitedirs]): - if _is_64bit: - sitedirs.insert(0, lib64_dir) - else: - sitedirs.append(lib64_dir) - try: - # sys.getobjects only available in --with-pydebug build - sys.getobjects - sitedirs.insert(0, os.path.join(sitedirs[0], 'debug')) - except AttributeError: - pass - # Debian-specific dist-packages directories: - sitedirs.append(os.path.join(prefix, "local/lib", - "python" + sys.version[:3], - "dist-packages")) - if sys.version[0] == '2': - sitedirs.append(os.path.join(prefix, "lib", - "python" + sys.version[:3], - "dist-packages")) - else: - sitedirs.append(os.path.join(prefix, "lib", - "python" + sys.version[0], - "dist-packages")) - sitedirs.append(os.path.join(prefix, "lib", "dist-python")) - else: - sitedirs = [prefix, os.path.join(prefix, "lib", "site-packages")] - if sys.platform == 'darwin': - # for framework builds *only* we add the standard Apple - # locations. Currently only per-user, but /Library and - # /Network/Library could be added too - if 'Python.framework' in prefix: - home = os.environ.get('HOME') - if home: - sitedirs.append( - os.path.join(home, - 'Library', - 'Python', - sys.version[:3], - 'site-packages')) - for sitedir in sitedirs: - if os.path.isdir(sitedir): - addsitedir(sitedir, known_paths) - return None - -def check_enableusersite(): - """Check if user site directory is safe for inclusion - - The function tests for the command line flag (including environment var), - process uid/gid equal to effective uid/gid. - - None: Disabled for security reasons - False: Disabled by user (command line option) - True: Safe and enabled - """ - if hasattr(sys, 'flags') and getattr(sys.flags, 'no_user_site', False): - return False - - if hasattr(os, "getuid") and hasattr(os, "geteuid"): - # check process uid == effective uid - if os.geteuid() != os.getuid(): - return None - if hasattr(os, "getgid") and hasattr(os, "getegid"): - # check process gid == effective gid - if os.getegid() != os.getgid(): - return None - - return True - -def addusersitepackages(known_paths): - """Add a per user site-package to sys.path - - Each user has its own python directory with site-packages in the - home directory. - - USER_BASE is the root directory for all Python versions - - USER_SITE is the user specific site-packages directory - - USER_SITE/.. can be used for data. - """ - global USER_BASE, USER_SITE, ENABLE_USER_SITE - env_base = os.environ.get("PYTHONUSERBASE", None) - - def joinuser(*args): - return os.path.expanduser(os.path.join(*args)) - - #if sys.platform in ('os2emx', 'riscos'): - # # Don't know what to put here - # USER_BASE = '' - # USER_SITE = '' - if os.name == "nt": - base = os.environ.get("APPDATA") or "~" - if env_base: - USER_BASE = env_base - else: - USER_BASE = joinuser(base, "Python") - USER_SITE = os.path.join(USER_BASE, - "Python" + sys.version[0] + sys.version[2], - "site-packages") - else: - if env_base: - USER_BASE = env_base - else: - USER_BASE = joinuser("~", ".local") - USER_SITE = os.path.join(USER_BASE, "lib", - "python" + sys.version[:3], - "site-packages") - - if ENABLE_USER_SITE and os.path.isdir(USER_SITE): - addsitedir(USER_SITE, known_paths) - if ENABLE_USER_SITE: - for dist_libdir in ("lib", "local/lib"): - user_site = os.path.join(USER_BASE, dist_libdir, - "python" + sys.version[:3], - "dist-packages") - if os.path.isdir(user_site): - addsitedir(user_site, known_paths) - return known_paths - - - -def setBEGINLIBPATH(): - """The OS/2 EMX port has optional extension modules that do double duty - as DLLs (and must use the .DLL file extension) for other extensions. - The library search path needs to be amended so these will be found - during module import. Use BEGINLIBPATH so that these are at the start - of the library search path. - - """ - dllpath = os.path.join(sys.prefix, "Lib", "lib-dynload") - libpath = os.environ['BEGINLIBPATH'].split(';') - if libpath[-1]: - libpath.append(dllpath) - else: - libpath[-1] = dllpath - os.environ['BEGINLIBPATH'] = ';'.join(libpath) - - -def setquit(): - """Define new built-ins 'quit' and 'exit'. - These are simply strings that display a hint on how to exit. - - """ - if os.sep == ':': - eof = 'Cmd-Q' - elif os.sep == '\\': - eof = 'Ctrl-Z plus Return' - else: - eof = 'Ctrl-D (i.e. EOF)' - - class Quitter(object): - def __init__(self, name): - self.name = name - def __repr__(self): - return 'Use %s() or %s to exit' % (self.name, eof) - def __call__(self, code=None): - # Shells like IDLE catch the SystemExit, but listen when their - # stdin wrapper is closed. - try: - sys.stdin.close() - except: - pass - raise SystemExit(code) - builtins.quit = Quitter('quit') - builtins.exit = Quitter('exit') - - -class _Printer(object): - """interactive prompt objects for printing the license text, a list of - contributors and the copyright notice.""" - - MAXLINES = 23 - - def __init__(self, name, data, files=(), dirs=()): - self.__name = name - self.__data = data - self.__files = files - self.__dirs = dirs - self.__lines = None - - def __setup(self): - if self.__lines: - return - data = None - for dir in self.__dirs: - for filename in self.__files: - filename = os.path.join(dir, filename) - try: - fp = open(filename, "rU") - data = fp.read() - fp.close() - break - except IOError: - pass - if data: - break - if not data: - data = self.__data - self.__lines = data.split('\n') - self.__linecnt = len(self.__lines) - - def __repr__(self): - self.__setup() - if len(self.__lines) <= self.MAXLINES: - return "\n".join(self.__lines) - else: - return "Type %s() to see the full %s text" % ((self.__name,)*2) - - def __call__(self): - self.__setup() - prompt = 'Hit Return for more, or q (and Return) to quit: ' - lineno = 0 - while 1: - try: - for i in range(lineno, lineno + self.MAXLINES): - print(self.__lines[i]) - except IndexError: - break - else: - lineno += self.MAXLINES - key = None - while key is None: - try: - key = raw_input(prompt) - except NameError: - key = input(prompt) - if key not in ('', 'q'): - key = None - if key == 'q': - break - -def setcopyright(): - """Set 'copyright' and 'credits' in __builtin__""" - builtins.copyright = _Printer("copyright", sys.copyright) - if _is_jython: - builtins.credits = _Printer( - "credits", - "Jython is maintained by the Jython developers (www.jython.org).") - elif _is_pypy: - builtins.credits = _Printer( - "credits", - "PyPy is maintained by the PyPy developers: http://pypy.org/") - else: - builtins.credits = _Printer("credits", """\ - Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands - for supporting Python development. See www.python.org for more information.""") - here = os.path.dirname(os.__file__) - builtins.license = _Printer( - "license", "See http://www.python.org/%.3s/license.html" % sys.version, - ["LICENSE.txt", "LICENSE"], - [os.path.join(here, os.pardir), here, os.curdir]) - - -class _Helper(object): - """Define the built-in 'help'. - This is a wrapper around pydoc.help (with a twist). - - """ - - def __repr__(self): - return "Type help() for interactive help, " \ - "or help(object) for help about object." - def __call__(self, *args, **kwds): - import pydoc - return pydoc.help(*args, **kwds) - -def sethelper(): - builtins.help = _Helper() - -def aliasmbcs(): - """On Windows, some default encodings are not provided by Python, - while they are always available as "mbcs" in each locale. Make - them usable by aliasing to "mbcs" in such a case.""" - if sys.platform == 'win32': - import locale, codecs - enc = locale.getdefaultlocale()[1] - if enc.startswith('cp'): # "cp***" ? - try: - codecs.lookup(enc) - except LookupError: - import encodings - encodings._cache[enc] = encodings._unknown - encodings.aliases.aliases[enc] = 'mbcs' - -def setencoding(): - """Set the string encoding used by the Unicode implementation. The - default is 'ascii', but if you're willing to experiment, you can - change this.""" - encoding = "ascii" # Default value set by _PyUnicode_Init() - if 0: - # Enable to support locale aware default string encodings. - import locale - loc = locale.getdefaultlocale() - if loc[1]: - encoding = loc[1] - if 0: - # Enable to switch off string to Unicode coercion and implicit - # Unicode to string conversion. - encoding = "undefined" - if encoding != "ascii": - # On Non-Unicode builds this will raise an AttributeError... - sys.setdefaultencoding(encoding) # Needs Python Unicode build ! - - -def execsitecustomize(): - """Run custom site specific code, if available.""" - try: - import sitecustomize - except ImportError: - pass - -def virtual_install_main_packages(): - f = open(os.path.join(os.path.dirname(__file__), 'orig-prefix.txt')) - sys.real_prefix = f.read().strip() - f.close() - pos = 2 - hardcoded_relative_dirs = [] - if sys.path[0] == '': - pos += 1 - if _is_jython: - paths = [os.path.join(sys.real_prefix, 'Lib')] - elif _is_pypy: - if sys.version_info > (3, 2): - cpyver = '%d' % sys.version_info[0] - elif sys.pypy_version_info >= (1, 5): - cpyver = '%d.%d' % sys.version_info[:2] - else: - cpyver = '%d.%d.%d' % sys.version_info[:3] - paths = [os.path.join(sys.real_prefix, 'lib_pypy'), - os.path.join(sys.real_prefix, 'lib-python', cpyver)] - if sys.pypy_version_info < (1, 9): - paths.insert(1, os.path.join(sys.real_prefix, - 'lib-python', 'modified-%s' % cpyver)) - hardcoded_relative_dirs = paths[:] # for the special 'darwin' case below - # - # This is hardcoded in the Python executable, but relative to sys.prefix: - for path in paths[:]: - plat_path = os.path.join(path, 'plat-%s' % sys.platform) - if os.path.exists(plat_path): - paths.append(plat_path) - elif sys.platform == 'win32': - paths = [os.path.join(sys.real_prefix, 'Lib'), os.path.join(sys.real_prefix, 'DLLs')] - else: - paths = [os.path.join(sys.real_prefix, 'lib', 'python'+sys.version[:3])] - hardcoded_relative_dirs = paths[:] # for the special 'darwin' case below - lib64_path = os.path.join(sys.real_prefix, 'lib64', 'python'+sys.version[:3]) - if os.path.exists(lib64_path): - if _is_64bit: - paths.insert(0, lib64_path) - else: - paths.append(lib64_path) - # This is hardcoded in the Python executable, but relative to - # sys.prefix. Debian change: we need to add the multiarch triplet - # here, which is where the real stuff lives. As per PEP 421, in - # Python 3.3+, this lives in sys.implementation, while in Python 2.7 - # it lives in sys. - try: - arch = getattr(sys, 'implementation', sys)._multiarch - except AttributeError: - # This is a non-multiarch aware Python. Fallback to the old way. - arch = sys.platform - plat_path = os.path.join(sys.real_prefix, 'lib', - 'python'+sys.version[:3], - 'plat-%s' % arch) - if os.path.exists(plat_path): - paths.append(plat_path) - # This is hardcoded in the Python executable, but - # relative to sys.prefix, so we have to fix up: - for path in list(paths): - tk_dir = os.path.join(path, 'lib-tk') - if os.path.exists(tk_dir): - paths.append(tk_dir) - - # These are hardcoded in the Apple's Python executable, - # but relative to sys.prefix, so we have to fix them up: - if sys.platform == 'darwin': - hardcoded_paths = [os.path.join(relative_dir, module) - for relative_dir in hardcoded_relative_dirs - for module in ('plat-darwin', 'plat-mac', 'plat-mac/lib-scriptpackages')] - - for path in hardcoded_paths: - if os.path.exists(path): - paths.append(path) - - sys.path.extend(paths) - -def force_global_eggs_after_local_site_packages(): - """ - Force easy_installed eggs in the global environment to get placed - in sys.path after all packages inside the virtualenv. This - maintains the "least surprise" result that packages in the - virtualenv always mask global packages, never the other way - around. - - """ - egginsert = getattr(sys, '__egginsert', 0) - for i, path in enumerate(sys.path): - if i > egginsert and path.startswith(sys.prefix): - egginsert = i - sys.__egginsert = egginsert + 1 - -def virtual_addsitepackages(known_paths): - force_global_eggs_after_local_site_packages() - return addsitepackages(known_paths, sys_prefix=sys.real_prefix) - -def fixclasspath(): - """Adjust the special classpath sys.path entries for Jython. These - entries should follow the base virtualenv lib directories. - """ - paths = [] - classpaths = [] - for path in sys.path: - if path == '__classpath__' or path.startswith('__pyclasspath__'): - classpaths.append(path) - else: - paths.append(path) - sys.path = paths - sys.path.extend(classpaths) - -def execusercustomize(): - """Run custom user specific code, if available.""" - try: - import usercustomize - except ImportError: - pass - - -def main(): - global ENABLE_USER_SITE - virtual_install_main_packages() - abs__file__() - paths_in_sys = removeduppaths() - if (os.name == "posix" and sys.path and - os.path.basename(sys.path[-1]) == "Modules"): - addbuilddir() - if _is_jython: - fixclasspath() - GLOBAL_SITE_PACKAGES = not os.path.exists(os.path.join(os.path.dirname(__file__), 'no-global-site-packages.txt')) - if not GLOBAL_SITE_PACKAGES: - ENABLE_USER_SITE = False - if ENABLE_USER_SITE is None: - ENABLE_USER_SITE = check_enableusersite() - paths_in_sys = addsitepackages(paths_in_sys) - paths_in_sys = addusersitepackages(paths_in_sys) - if GLOBAL_SITE_PACKAGES: - paths_in_sys = virtual_addsitepackages(paths_in_sys) - if sys.platform == 'os2emx': - setBEGINLIBPATH() - setquit() - setcopyright() - sethelper() - aliasmbcs() - setencoding() - execsitecustomize() - if ENABLE_USER_SITE: - execusercustomize() - # Remove sys.setdefaultencoding() so that users cannot change the - # encoding after initialization. The test for presence is needed when - # this module is run as a script, because this code is executed twice. - if hasattr(sys, "setdefaultencoding"): - del sys.setdefaultencoding - -main() - -def _script(): - help = """\ - %s [--user-base] [--user-site] - - Without arguments print some useful information - With arguments print the value of USER_BASE and/or USER_SITE separated - by '%s'. - - Exit codes with --user-base or --user-site: - 0 - user site directory is enabled - 1 - user site directory is disabled by user - 2 - uses site directory is disabled by super user - or for security reasons - >2 - unknown error - """ - args = sys.argv[1:] - if not args: - print("sys.path = [") - for dir in sys.path: - print(" %r," % (dir,)) - print("]") - def exists(path): - if os.path.isdir(path): - return "exists" - else: - return "doesn't exist" - print("USER_BASE: %r (%s)" % (USER_BASE, exists(USER_BASE))) - print("USER_SITE: %r (%s)" % (USER_SITE, exists(USER_BASE))) - print("ENABLE_USER_SITE: %r" % ENABLE_USER_SITE) - sys.exit(0) - - buffer = [] - if '--user-base' in args: - buffer.append(USER_BASE) - if '--user-site' in args: - buffer.append(USER_SITE) - - if buffer: - print(os.pathsep.join(buffer)) - if ENABLE_USER_SITE: - sys.exit(0) - elif ENABLE_USER_SITE is False: - sys.exit(1) - elif ENABLE_USER_SITE is None: - sys.exit(2) - else: - sys.exit(3) - else: - import textwrap - print(textwrap.dedent(help % (sys.argv[0], os.pathsep))) - sys.exit(10) - -if __name__ == '__main__': - _script()
--- a/test/lib/python2.7/sre.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -"""This file is only retained for backwards compatibility. -It will be removed in the future. sre was moved to re in version 2.5. -""" - -import warnings -warnings.warn("The sre module is deprecated, please import re.", - DeprecationWarning, 2) - -from re import * -from re import __all__ - -# old pickles expect the _compile() reconstructor in this module -from re import _compile
--- a/test/lib/python2.7/sre_compile.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,596 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Secret Labs' Regular Expression Engine -# -# convert template to internal format -# -# Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. -# -# See the sre.py file for information on usage and redistribution. -# - -"""Internal support module for sre""" - -import _sre, sys -import sre_parse -from sre_constants import * - -assert _sre.MAGIC == MAGIC, "SRE module mismatch" - -if _sre.CODESIZE == 2: - MAXCODE = 65535 -else: - MAXCODE = 0xFFFFFFFFL - -_LITERAL_CODES = set([LITERAL, NOT_LITERAL]) -_REPEATING_CODES = set([REPEAT, MIN_REPEAT, MAX_REPEAT]) -_SUCCESS_CODES = set([SUCCESS, FAILURE]) -_ASSERT_CODES = set([ASSERT, ASSERT_NOT]) - -# Sets of lowercase characters which have the same uppercase. -_equivalences = ( - # LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I - (0x69, 0x131), # iı - # LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S - (0x73, 0x17f), # sÅ¿ - # MICRO SIGN, GREEK SMALL LETTER MU - (0xb5, 0x3bc), # µμ - # COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI - (0x345, 0x3b9, 0x1fbe), # \u0345ιι - # GREEK SMALL LETTER BETA, GREEK BETA SYMBOL - (0x3b2, 0x3d0), # Î²Ï - # GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL - (0x3b5, 0x3f5), # εϵ - # GREEK SMALL LETTER THETA, GREEK THETA SYMBOL - (0x3b8, 0x3d1), # θϑ - # GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL - (0x3ba, 0x3f0), # κϰ - # GREEK SMALL LETTER PI, GREEK PI SYMBOL - (0x3c0, 0x3d6), # πϖ - # GREEK SMALL LETTER RHO, GREEK RHO SYMBOL - (0x3c1, 0x3f1), # Ïϱ - # GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA - (0x3c2, 0x3c3), # ςσ - # GREEK SMALL LETTER PHI, GREEK PHI SYMBOL - (0x3c6, 0x3d5), # φϕ - # LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE - (0x1e61, 0x1e9b), # ṡẛ -) - -# Maps the lowercase code to lowercase codes which have the same uppercase. -_ignorecase_fixes = {i: tuple(j for j in t if i != j) - for t in _equivalences for i in t} - -def _compile(code, pattern, flags): - # internal: compile a (sub)pattern - emit = code.append - _len = len - LITERAL_CODES = _LITERAL_CODES - REPEATING_CODES = _REPEATING_CODES - SUCCESS_CODES = _SUCCESS_CODES - ASSERT_CODES = _ASSERT_CODES - if (flags & SRE_FLAG_IGNORECASE and - not (flags & SRE_FLAG_LOCALE) and - flags & SRE_FLAG_UNICODE): - fixes = _ignorecase_fixes - else: - fixes = None - for op, av in pattern: - if op in LITERAL_CODES: - if flags & SRE_FLAG_IGNORECASE: - lo = _sre.getlower(av, flags) - if fixes and lo in fixes: - emit(OPCODES[IN_IGNORE]) - skip = _len(code); emit(0) - if op is NOT_LITERAL: - emit(OPCODES[NEGATE]) - for k in (lo,) + fixes[lo]: - emit(OPCODES[LITERAL]) - emit(k) - emit(OPCODES[FAILURE]) - code[skip] = _len(code) - skip - else: - emit(OPCODES[OP_IGNORE[op]]) - emit(lo) - else: - emit(OPCODES[op]) - emit(av) - elif op is IN: - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - def fixup(literal, flags=flags): - return _sre.getlower(literal, flags) - else: - emit(OPCODES[op]) - fixup = None - skip = _len(code); emit(0) - _compile_charset(av, flags, code, fixup, fixes) - code[skip] = _len(code) - skip - elif op is ANY: - if flags & SRE_FLAG_DOTALL: - emit(OPCODES[ANY_ALL]) - else: - emit(OPCODES[ANY]) - elif op in REPEATING_CODES: - if flags & SRE_FLAG_TEMPLATE: - raise error, "internal: unsupported template operator" - emit(OPCODES[REPEAT]) - skip = _len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) - code[skip] = _len(code) - skip - elif _simple(av) and op is not REPEAT: - if op is MAX_REPEAT: - emit(OPCODES[REPEAT_ONE]) - else: - emit(OPCODES[MIN_REPEAT_ONE]) - skip = _len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - emit(OPCODES[SUCCESS]) - code[skip] = _len(code) - skip - else: - emit(OPCODES[REPEAT]) - skip = _len(code); emit(0) - emit(av[0]) - emit(av[1]) - _compile(code, av[2], flags) - code[skip] = _len(code) - skip - if op is MAX_REPEAT: - emit(OPCODES[MAX_UNTIL]) - else: - emit(OPCODES[MIN_UNTIL]) - elif op is SUBPATTERN: - if av[0]: - emit(OPCODES[MARK]) - emit((av[0]-1)*2) - # _compile_info(code, av[1], flags) - _compile(code, av[1], flags) - if av[0]: - emit(OPCODES[MARK]) - emit((av[0]-1)*2+1) - elif op in SUCCESS_CODES: - emit(OPCODES[op]) - elif op in ASSERT_CODES: - emit(OPCODES[op]) - skip = _len(code); emit(0) - if av[0] >= 0: - emit(0) # look ahead - else: - lo, hi = av[1].getwidth() - if lo != hi: - raise error, "look-behind requires fixed-width pattern" - emit(lo) # look behind - _compile(code, av[1], flags) - emit(OPCODES[SUCCESS]) - code[skip] = _len(code) - skip - elif op is CALL: - emit(OPCODES[op]) - skip = _len(code); emit(0) - _compile(code, av, flags) - emit(OPCODES[SUCCESS]) - code[skip] = _len(code) - skip - elif op is AT: - emit(OPCODES[op]) - if flags & SRE_FLAG_MULTILINE: - av = AT_MULTILINE.get(av, av) - if flags & SRE_FLAG_LOCALE: - av = AT_LOCALE.get(av, av) - elif flags & SRE_FLAG_UNICODE: - av = AT_UNICODE.get(av, av) - emit(ATCODES[av]) - elif op is BRANCH: - emit(OPCODES[op]) - tail = [] - tailappend = tail.append - for av in av[1]: - skip = _len(code); emit(0) - # _compile_info(code, av, flags) - _compile(code, av, flags) - emit(OPCODES[JUMP]) - tailappend(_len(code)); emit(0) - code[skip] = _len(code) - skip - emit(0) # end of branch - for tail in tail: - code[tail] = _len(code) - tail - elif op is CATEGORY: - emit(OPCODES[op]) - if flags & SRE_FLAG_LOCALE: - av = CH_LOCALE[av] - elif flags & SRE_FLAG_UNICODE: - av = CH_UNICODE[av] - emit(CHCODES[av]) - elif op is GROUPREF: - if flags & SRE_FLAG_IGNORECASE: - emit(OPCODES[OP_IGNORE[op]]) - else: - emit(OPCODES[op]) - emit(av-1) - elif op is GROUPREF_EXISTS: - emit(OPCODES[op]) - emit(av[0]-1) - skipyes = _len(code); emit(0) - _compile(code, av[1], flags) - if av[2]: - emit(OPCODES[JUMP]) - skipno = _len(code); emit(0) - code[skipyes] = _len(code) - skipyes + 1 - _compile(code, av[2], flags) - code[skipno] = _len(code) - skipno - else: - code[skipyes] = _len(code) - skipyes + 1 - else: - raise ValueError, ("unsupported operand type", op) - -def _compile_charset(charset, flags, code, fixup=None, fixes=None): - # compile charset subprogram - emit = code.append - for op, av in _optimize_charset(charset, fixup, fixes, - flags & SRE_FLAG_UNICODE): - emit(OPCODES[op]) - if op is NEGATE: - pass - elif op is LITERAL: - emit(av) - elif op is RANGE: - emit(av[0]) - emit(av[1]) - elif op is CHARSET: - code.extend(av) - elif op is BIGCHARSET: - code.extend(av) - elif op is CATEGORY: - if flags & SRE_FLAG_LOCALE: - emit(CHCODES[CH_LOCALE[av]]) - elif flags & SRE_FLAG_UNICODE: - emit(CHCODES[CH_UNICODE[av]]) - else: - emit(CHCODES[av]) - else: - raise error, "internal: unsupported set operator" - emit(OPCODES[FAILURE]) - -def _optimize_charset(charset, fixup, fixes, isunicode): - # internal: optimize character set - out = [] - tail = [] - charmap = bytearray(256) - for op, av in charset: - while True: - try: - if op is LITERAL: - if fixup: - i = fixup(av) - charmap[i] = 1 - if fixes and i in fixes: - for k in fixes[i]: - charmap[k] = 1 - else: - charmap[av] = 1 - elif op is RANGE: - r = range(av[0], av[1]+1) - if fixup: - r = map(fixup, r) - if fixup and fixes: - for i in r: - charmap[i] = 1 - if i in fixes: - for k in fixes[i]: - charmap[k] = 1 - else: - for i in r: - charmap[i] = 1 - elif op is NEGATE: - out.append((op, av)) - else: - tail.append((op, av)) - except IndexError: - if len(charmap) == 256: - # character set contains non-UCS1 character codes - charmap += b'\0' * 0xff00 - continue - # character set contains non-BMP character codes - if fixup and isunicode and op is RANGE: - lo, hi = av - ranges = [av] - # There are only two ranges of cased astral characters: - # 10400-1044F (Deseret) and 118A0-118DF (Warang Citi). - _fixup_range(max(0x10000, lo), min(0x11fff, hi), - ranges, fixup) - for lo, hi in ranges: - if lo == hi: - tail.append((LITERAL, hi)) - else: - tail.append((RANGE, (lo, hi))) - else: - tail.append((op, av)) - break - - # compress character map - runs = [] - q = 0 - while True: - p = charmap.find(b'\1', q) - if p < 0: - break - if len(runs) >= 2: - runs = None - break - q = charmap.find(b'\0', p) - if q < 0: - runs.append((p, len(charmap))) - break - runs.append((p, q)) - if runs is not None: - # use literal/range - for p, q in runs: - if q - p == 1: - out.append((LITERAL, p)) - else: - out.append((RANGE, (p, q - 1))) - out += tail - # if the case was changed or new representation is more compact - if fixup or len(out) < len(charset): - return out - # else original character set is good enough - return charset - - # use bitmap - if len(charmap) == 256: - data = _mk_bitmap(charmap) - out.append((CHARSET, data)) - out += tail - return out - - # To represent a big charset, first a bitmap of all characters in the - # set is constructed. Then, this bitmap is sliced into chunks of 256 - # characters, duplicate chunks are eliminated, and each chunk is - # given a number. In the compiled expression, the charset is - # represented by a 32-bit word sequence, consisting of one word for - # the number of different chunks, a sequence of 256 bytes (64 words) - # of chunk numbers indexed by their original chunk position, and a - # sequence of 256-bit chunks (8 words each). - - # Compression is normally good: in a typical charset, large ranges of - # Unicode will be either completely excluded (e.g. if only cyrillic - # letters are to be matched), or completely included (e.g. if large - # subranges of Kanji match). These ranges will be represented by - # chunks of all one-bits or all zero-bits. - - # Matching can be also done efficiently: the more significant byte of - # the Unicode character is an index into the chunk number, and the - # less significant byte is a bit index in the chunk (just like the - # CHARSET matching). - - # In UCS-4 mode, the BIGCHARSET opcode still supports only subsets - # of the basic multilingual plane; an efficient representation - # for all of Unicode has not yet been developed. - - charmap = bytes(charmap) # should be hashable - comps = {} - mapping = bytearray(256) - block = 0 - data = bytearray() - for i in range(0, 65536, 256): - chunk = charmap[i: i + 256] - if chunk in comps: - mapping[i // 256] = comps[chunk] - else: - mapping[i // 256] = comps[chunk] = block - block += 1 - data += chunk - data = _mk_bitmap(data) - data[0:0] = [block] + _bytes_to_codes(mapping) - out.append((BIGCHARSET, data)) - out += tail - return out - -def _fixup_range(lo, hi, ranges, fixup): - for i in map(fixup, range(lo, hi+1)): - for k, (lo, hi) in enumerate(ranges): - if i < lo: - if l == lo - 1: - ranges[k] = (i, hi) - else: - ranges.insert(k, (i, i)) - break - elif i > hi: - if i == hi + 1: - ranges[k] = (lo, i) - break - else: - break - else: - ranges.append((i, i)) - -_CODEBITS = _sre.CODESIZE * 8 -_BITS_TRANS = b'0' + b'1' * 255 -def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int): - s = bytes(bits).translate(_BITS_TRANS)[::-1] - return [_int(s[i - _CODEBITS: i], 2) - for i in range(len(s), 0, -_CODEBITS)] - -def _bytes_to_codes(b): - # Convert block indices to word array - import array - if _sre.CODESIZE == 2: - code = 'H' - else: - code = 'I' - a = array.array(code, bytes(b)) - assert a.itemsize == _sre.CODESIZE - assert len(a) * a.itemsize == len(b) - return a.tolist() - -def _simple(av): - # check if av is a "simple" operator - lo, hi = av[2].getwidth() - return lo == hi == 1 and av[2][0][0] != SUBPATTERN - -def _compile_info(code, pattern, flags): - # internal: compile an info block. in the current version, - # this contains min/max pattern width, and an optional literal - # prefix or a character map - lo, hi = pattern.getwidth() - if lo == 0: - return # not worth it - # look for a literal prefix - prefix = [] - prefixappend = prefix.append - prefix_skip = 0 - charset = [] # not used - charsetappend = charset.append - if not (flags & SRE_FLAG_IGNORECASE): - # look for literal prefix - for op, av in pattern.data: - if op is LITERAL: - if len(prefix) == prefix_skip: - prefix_skip = prefix_skip + 1 - prefixappend(av) - elif op is SUBPATTERN and len(av[1]) == 1: - op, av = av[1][0] - if op is LITERAL: - prefixappend(av) - else: - break - else: - break - # if no prefix, look for charset prefix - if not prefix and pattern.data: - op, av = pattern.data[0] - if op is SUBPATTERN and av[1]: - op, av = av[1][0] - if op is LITERAL: - charsetappend((op, av)) - elif op is BRANCH: - c = [] - cappend = c.append - for p in av[1]: - if not p: - break - op, av = p[0] - if op is LITERAL: - cappend((op, av)) - else: - break - else: - charset = c - elif op is BRANCH: - c = [] - cappend = c.append - for p in av[1]: - if not p: - break - op, av = p[0] - if op is LITERAL: - cappend((op, av)) - else: - break - else: - charset = c - elif op is IN: - charset = av -## if prefix: -## print "*** PREFIX", prefix, prefix_skip -## if charset: -## print "*** CHARSET", charset - # add an info block - emit = code.append - emit(OPCODES[INFO]) - skip = len(code); emit(0) - # literal flag - mask = 0 - if prefix: - mask = SRE_INFO_PREFIX - if len(prefix) == prefix_skip == len(pattern.data): - mask = mask + SRE_INFO_LITERAL - elif charset: - mask = mask + SRE_INFO_CHARSET - emit(mask) - # pattern length - if lo < MAXCODE: - emit(lo) - else: - emit(MAXCODE) - prefix = prefix[:MAXCODE] - if hi < MAXCODE: - emit(hi) - else: - emit(0) - # add literal prefix - if prefix: - emit(len(prefix)) # length - emit(prefix_skip) # skip - code.extend(prefix) - # generate overlap table - table = [-1] + ([0]*len(prefix)) - for i in xrange(len(prefix)): - table[i+1] = table[i]+1 - while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]: - table[i+1] = table[table[i+1]-1]+1 - code.extend(table[1:]) # don't store first entry - elif charset: - _compile_charset(charset, flags, code) - code[skip] = len(code) - skip - -try: - unicode -except NameError: - STRING_TYPES = (type(""),) -else: - STRING_TYPES = (type(""), type(unicode(""))) - -def isstring(obj): - for tp in STRING_TYPES: - if isinstance(obj, tp): - return 1 - return 0 - -def _code(p, flags): - - flags = p.pattern.flags | flags - code = [] - - # compile info block - _compile_info(code, p, flags) - - # compile the pattern - _compile(code, p.data, flags) - - code.append(OPCODES[SUCCESS]) - - return code - -def compile(p, flags=0): - # internal: convert pattern list to internal format - - if isstring(p): - pattern = p - p = sre_parse.parse(p, flags) - else: - pattern = None - - code = _code(p, flags) - - # print code - - # XXX: <fl> get rid of this limitation! - if p.pattern.groups > 100: - raise AssertionError( - "sorry, but this version only supports 100 named groups" - ) - - # map in either direction - groupindex = p.pattern.groupdict - indexgroup = [None] * p.pattern.groups - for k, i in groupindex.items(): - indexgroup[i] = k - - return _sre.compile( - pattern, flags | p.pattern.flags, code, - p.pattern.groups-1, - groupindex, indexgroup - )
--- a/test/lib/python2.7/sre_constants.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,263 +0,0 @@ -# -# Secret Labs' Regular Expression Engine -# -# various symbols used by the regular expression engine. -# run this script to update the _sre include files! -# -# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. -# -# See the sre.py file for information on usage and redistribution. -# - -"""Internal support module for sre""" - -# update when constants are added or removed - -MAGIC = 20031017 - -try: - from _sre import MAXREPEAT -except ImportError: - import _sre - MAXREPEAT = _sre.MAXREPEAT = 65535 - -# SRE standard exception (access as sre.error) -# should this really be here? - -class error(Exception): - pass - -# operators - -FAILURE = "failure" -SUCCESS = "success" - -ANY = "any" -ANY_ALL = "any_all" -ASSERT = "assert" -ASSERT_NOT = "assert_not" -AT = "at" -BIGCHARSET = "bigcharset" -BRANCH = "branch" -CALL = "call" -CATEGORY = "category" -CHARSET = "charset" -GROUPREF = "groupref" -GROUPREF_IGNORE = "groupref_ignore" -GROUPREF_EXISTS = "groupref_exists" -IN = "in" -IN_IGNORE = "in_ignore" -INFO = "info" -JUMP = "jump" -LITERAL = "literal" -LITERAL_IGNORE = "literal_ignore" -MARK = "mark" -MAX_REPEAT = "max_repeat" -MAX_UNTIL = "max_until" -MIN_REPEAT = "min_repeat" -MIN_UNTIL = "min_until" -NEGATE = "negate" -NOT_LITERAL = "not_literal" -NOT_LITERAL_IGNORE = "not_literal_ignore" -RANGE = "range" -REPEAT = "repeat" -REPEAT_ONE = "repeat_one" -SUBPATTERN = "subpattern" -MIN_REPEAT_ONE = "min_repeat_one" - -# positions -AT_BEGINNING = "at_beginning" -AT_BEGINNING_LINE = "at_beginning_line" -AT_BEGINNING_STRING = "at_beginning_string" -AT_BOUNDARY = "at_boundary" -AT_NON_BOUNDARY = "at_non_boundary" -AT_END = "at_end" -AT_END_LINE = "at_end_line" -AT_END_STRING = "at_end_string" -AT_LOC_BOUNDARY = "at_loc_boundary" -AT_LOC_NON_BOUNDARY = "at_loc_non_boundary" -AT_UNI_BOUNDARY = "at_uni_boundary" -AT_UNI_NON_BOUNDARY = "at_uni_non_boundary" - -# categories -CATEGORY_DIGIT = "category_digit" -CATEGORY_NOT_DIGIT = "category_not_digit" -CATEGORY_SPACE = "category_space" -CATEGORY_NOT_SPACE = "category_not_space" -CATEGORY_WORD = "category_word" -CATEGORY_NOT_WORD = "category_not_word" -CATEGORY_LINEBREAK = "category_linebreak" -CATEGORY_NOT_LINEBREAK = "category_not_linebreak" -CATEGORY_LOC_WORD = "category_loc_word" -CATEGORY_LOC_NOT_WORD = "category_loc_not_word" -CATEGORY_UNI_DIGIT = "category_uni_digit" -CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit" -CATEGORY_UNI_SPACE = "category_uni_space" -CATEGORY_UNI_NOT_SPACE = "category_uni_not_space" -CATEGORY_UNI_WORD = "category_uni_word" -CATEGORY_UNI_NOT_WORD = "category_uni_not_word" -CATEGORY_UNI_LINEBREAK = "category_uni_linebreak" -CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak" - -OPCODES = [ - - # failure=0 success=1 (just because it looks better that way :-) - FAILURE, SUCCESS, - - ANY, ANY_ALL, - ASSERT, ASSERT_NOT, - AT, - BRANCH, - CALL, - CATEGORY, - CHARSET, BIGCHARSET, - GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, - IN, IN_IGNORE, - INFO, - JUMP, - LITERAL, LITERAL_IGNORE, - MARK, - MAX_UNTIL, - MIN_UNTIL, - NOT_LITERAL, NOT_LITERAL_IGNORE, - NEGATE, - RANGE, - REPEAT, - REPEAT_ONE, - SUBPATTERN, - MIN_REPEAT_ONE - -] - -ATCODES = [ - AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, - AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, - AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, - AT_UNI_NON_BOUNDARY -] - -CHCODES = [ - CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, - CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, - CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, - CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, - CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, - CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, - CATEGORY_UNI_NOT_LINEBREAK -] - -def makedict(list): - d = {} - i = 0 - for item in list: - d[item] = i - i = i + 1 - return d - -OPCODES = makedict(OPCODES) -ATCODES = makedict(ATCODES) -CHCODES = makedict(CHCODES) - -# replacement operations for "ignore case" mode -OP_IGNORE = { - GROUPREF: GROUPREF_IGNORE, - IN: IN_IGNORE, - LITERAL: LITERAL_IGNORE, - NOT_LITERAL: NOT_LITERAL_IGNORE -} - -AT_MULTILINE = { - AT_BEGINNING: AT_BEGINNING_LINE, - AT_END: AT_END_LINE -} - -AT_LOCALE = { - AT_BOUNDARY: AT_LOC_BOUNDARY, - AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY -} - -AT_UNICODE = { - AT_BOUNDARY: AT_UNI_BOUNDARY, - AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY -} - -CH_LOCALE = { - CATEGORY_DIGIT: CATEGORY_DIGIT, - CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, - CATEGORY_SPACE: CATEGORY_SPACE, - CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, - CATEGORY_WORD: CATEGORY_LOC_WORD, - CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, - CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, - CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK -} - -CH_UNICODE = { - CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, - CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, - CATEGORY_SPACE: CATEGORY_UNI_SPACE, - CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, - CATEGORY_WORD: CATEGORY_UNI_WORD, - CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, - CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, - CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK -} - -# flags -SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) -SRE_FLAG_IGNORECASE = 2 # case insensitive -SRE_FLAG_LOCALE = 4 # honour system locale -SRE_FLAG_MULTILINE = 8 # treat target as multiline string -SRE_FLAG_DOTALL = 16 # treat target as a single string -SRE_FLAG_UNICODE = 32 # use unicode locale -SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments -SRE_FLAG_DEBUG = 128 # debugging - -# flags for INFO primitive -SRE_INFO_PREFIX = 1 # has prefix -SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) -SRE_INFO_CHARSET = 4 # pattern starts with character from given set - -if __name__ == "__main__": - def dump(f, d, prefix): - items = d.items() - items.sort(key=lambda a: a[1]) - for k, v in items: - f.write("#define %s_%s %s\n" % (prefix, k.upper(), v)) - f = open("sre_constants.h", "w") - f.write("""\ -/* - * Secret Labs' Regular Expression Engine - * - * regular expression matching engine - * - * NOTE: This file is generated by sre_constants.py. If you need - * to change anything in here, edit sre_constants.py and run it. - * - * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. - * - * See the _sre.c file for information on usage and redistribution. - */ - -""") - - f.write("#define SRE_MAGIC %d\n" % MAGIC) - - dump(f, OPCODES, "SRE_OP") - dump(f, ATCODES, "SRE") - dump(f, CHCODES, "SRE") - - f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) - f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) - f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) - f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) - f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) - f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) - f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) - - f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) - f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) - f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) - - f.close() - print "done"
--- a/test/lib/python2.7/sre_parse.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,834 +0,0 @@ -# -# Secret Labs' Regular Expression Engine -# -# convert re-style regular expression to sre pattern -# -# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. -# -# See the sre.py file for information on usage and redistribution. -# - -"""Internal support module for sre""" - -# XXX: show string offset and offending character for all errors - -import sys - -from sre_constants import * - -SPECIAL_CHARS = ".\\[{()*+?^$|" -REPEAT_CHARS = "*+?{" - -DIGITS = set("0123456789") - -OCTDIGITS = set("01234567") -HEXDIGITS = set("0123456789abcdefABCDEF") - -WHITESPACE = set(" \t\n\r\v\f") - -ESCAPES = { - r"\a": (LITERAL, ord("\a")), - r"\b": (LITERAL, ord("\b")), - r"\f": (LITERAL, ord("\f")), - r"\n": (LITERAL, ord("\n")), - r"\r": (LITERAL, ord("\r")), - r"\t": (LITERAL, ord("\t")), - r"\v": (LITERAL, ord("\v")), - r"\\": (LITERAL, ord("\\")) -} - -CATEGORIES = { - r"\A": (AT, AT_BEGINNING_STRING), # start of string - r"\b": (AT, AT_BOUNDARY), - r"\B": (AT, AT_NON_BOUNDARY), - r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), - r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), - r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), - r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), - r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), - r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), - r"\Z": (AT, AT_END_STRING), # end of string -} - -FLAGS = { - # standard flags - "i": SRE_FLAG_IGNORECASE, - "L": SRE_FLAG_LOCALE, - "m": SRE_FLAG_MULTILINE, - "s": SRE_FLAG_DOTALL, - "x": SRE_FLAG_VERBOSE, - # extensions - "t": SRE_FLAG_TEMPLATE, - "u": SRE_FLAG_UNICODE, -} - -class Pattern: - # master pattern object. keeps track of global attributes - def __init__(self): - self.flags = 0 - self.open = [] - self.groups = 1 - self.groupdict = {} - self.lookbehind = 0 - - def opengroup(self, name=None): - gid = self.groups - self.groups = gid + 1 - if name is not None: - ogid = self.groupdict.get(name, None) - if ogid is not None: - raise error, ("redefinition of group name %s as group %d; " - "was group %d" % (repr(name), gid, ogid)) - self.groupdict[name] = gid - self.open.append(gid) - return gid - def closegroup(self, gid): - self.open.remove(gid) - def checkgroup(self, gid): - return gid < self.groups and gid not in self.open - -class SubPattern: - # a subpattern, in intermediate form - def __init__(self, pattern, data=None): - self.pattern = pattern - if data is None: - data = [] - self.data = data - self.width = None - def dump(self, level=0): - seqtypes = (tuple, list) - for op, av in self.data: - print level*" " + op, - if op == IN: - # member sublanguage - print - for op, a in av: - print (level+1)*" " + op, a - elif op == BRANCH: - print - for i, a in enumerate(av[1]): - if i: - print level*" " + "or" - a.dump(level+1) - elif op == GROUPREF_EXISTS: - condgroup, item_yes, item_no = av - print condgroup - item_yes.dump(level+1) - if item_no: - print level*" " + "else" - item_no.dump(level+1) - elif isinstance(av, seqtypes): - nl = 0 - for a in av: - if isinstance(a, SubPattern): - if not nl: - print - a.dump(level+1) - nl = 1 - else: - print a, - nl = 0 - if not nl: - print - else: - print av - def __repr__(self): - return repr(self.data) - def __len__(self): - return len(self.data) - def __delitem__(self, index): - del self.data[index] - def __getitem__(self, index): - if isinstance(index, slice): - return SubPattern(self.pattern, self.data[index]) - return self.data[index] - def __setitem__(self, index, code): - self.data[index] = code - def insert(self, index, code): - self.data.insert(index, code) - def append(self, code): - self.data.append(code) - def getwidth(self): - # determine the width (min, max) for this subpattern - if self.width: - return self.width - lo = hi = 0 - UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY) - REPEATCODES = (MIN_REPEAT, MAX_REPEAT) - for op, av in self.data: - if op is BRANCH: - i = MAXREPEAT - 1 - j = 0 - for av in av[1]: - l, h = av.getwidth() - i = min(i, l) - j = max(j, h) - lo = lo + i - hi = hi + j - elif op is CALL: - i, j = av.getwidth() - lo = lo + i - hi = hi + j - elif op is SUBPATTERN: - i, j = av[1].getwidth() - lo = lo + i - hi = hi + j - elif op in REPEATCODES: - i, j = av[2].getwidth() - lo = lo + i * av[0] - hi = hi + j * av[1] - elif op in UNITCODES: - lo = lo + 1 - hi = hi + 1 - elif op == SUCCESS: - break - self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) - return self.width - -class Tokenizer: - def __init__(self, string): - self.string = string - self.index = 0 - self.__next() - def __next(self): - if self.index >= len(self.string): - self.next = None - return - char = self.string[self.index] - if char[0] == "\\": - try: - c = self.string[self.index + 1] - except IndexError: - raise error, "bogus escape (end of line)" - char = char + c - self.index = self.index + len(char) - self.next = char - def match(self, char, skip=1): - if char == self.next: - if skip: - self.__next() - return 1 - return 0 - def get(self): - this = self.next - self.__next() - return this - def tell(self): - return self.index, self.next - def seek(self, index): - self.index, self.next = index - -def isident(char): - return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_" - -def isdigit(char): - return "0" <= char <= "9" - -def isname(name): - # check that group name is a valid string - if not isident(name[0]): - return False - for char in name[1:]: - if not isident(char) and not isdigit(char): - return False - return True - -def _class_escape(source, escape): - # handle escape code inside character class - code = ESCAPES.get(escape) - if code: - return code - code = CATEGORIES.get(escape) - if code and code[0] == IN: - return code - try: - c = escape[1:2] - if c == "x": - # hexadecimal escape (exactly two digits) - while source.next in HEXDIGITS and len(escape) < 4: - escape = escape + source.get() - escape = escape[2:] - if len(escape) != 2: - raise error, "bogus escape: %s" % repr("\\" + escape) - return LITERAL, int(escape, 16) & 0xff - elif c in OCTDIGITS: - # octal escape (up to three digits) - while source.next in OCTDIGITS and len(escape) < 4: - escape = escape + source.get() - escape = escape[1:] - return LITERAL, int(escape, 8) & 0xff - elif c in DIGITS: - raise error, "bogus escape: %s" % repr(escape) - if len(escape) == 2: - return LITERAL, ord(escape[1]) - except ValueError: - pass - raise error, "bogus escape: %s" % repr(escape) - -def _escape(source, escape, state): - # handle escape code in expression - code = CATEGORIES.get(escape) - if code: - return code - code = ESCAPES.get(escape) - if code: - return code - try: - c = escape[1:2] - if c == "x": - # hexadecimal escape - while source.next in HEXDIGITS and len(escape) < 4: - escape = escape + source.get() - if len(escape) != 4: - raise ValueError - return LITERAL, int(escape[2:], 16) & 0xff - elif c == "0": - # octal escape - while source.next in OCTDIGITS and len(escape) < 4: - escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff - elif c in DIGITS: - # octal escape *or* decimal group reference (sigh) - if source.next in DIGITS: - escape = escape + source.get() - if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and - source.next in OCTDIGITS): - # got three octal digits; this is an octal escape - escape = escape + source.get() - return LITERAL, int(escape[1:], 8) & 0xff - # not an octal escape, so this is a group reference - group = int(escape[1:]) - if group < state.groups: - if not state.checkgroup(group): - raise error, "cannot refer to open group" - if state.lookbehind: - import warnings - warnings.warn('group references in lookbehind ' - 'assertions are not supported', - RuntimeWarning) - return GROUPREF, group - raise ValueError - if len(escape) == 2: - return LITERAL, ord(escape[1]) - except ValueError: - pass - raise error, "bogus escape: %s" % repr(escape) - -def _parse_sub(source, state, nested=1): - # parse an alternation: a|b|c - - items = [] - itemsappend = items.append - sourcematch = source.match - while 1: - itemsappend(_parse(source, state)) - if sourcematch("|"): - continue - if not nested: - break - if not source.next or sourcematch(")", 0): - break - else: - raise error, "pattern not properly closed" - - if len(items) == 1: - return items[0] - - subpattern = SubPattern(state) - subpatternappend = subpattern.append - - # check if all items share a common prefix - while 1: - prefix = None - for item in items: - if not item: - break - if prefix is None: - prefix = item[0] - elif item[0] != prefix: - break - else: - # all subitems start with a common "prefix". - # move it out of the branch - for item in items: - del item[0] - subpatternappend(prefix) - continue # check next one - break - - # check if the branch can be replaced by a character set - for item in items: - if len(item) != 1 or item[0][0] != LITERAL: - break - else: - # we can store this as a character set instead of a - # branch (the compiler may optimize this even more) - set = [] - setappend = set.append - for item in items: - setappend(item[0]) - subpatternappend((IN, set)) - return subpattern - - subpattern.append((BRANCH, (None, items))) - return subpattern - -def _parse_sub_cond(source, state, condgroup): - item_yes = _parse(source, state) - if source.match("|"): - item_no = _parse(source, state) - if source.match("|"): - raise error, "conditional backref with more than two branches" - else: - item_no = None - if source.next and not source.match(")", 0): - raise error, "pattern not properly closed" - subpattern = SubPattern(state) - subpattern.append((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) - return subpattern - -_PATTERNENDERS = set("|)") -_ASSERTCHARS = set("=!<") -_LOOKBEHINDASSERTCHARS = set("=!") -_REPEATCODES = set([MIN_REPEAT, MAX_REPEAT]) - -def _parse(source, state): - # parse a simple pattern - subpattern = SubPattern(state) - - # precompute constants into local variables - subpatternappend = subpattern.append - sourceget = source.get - sourcematch = source.match - _len = len - PATTERNENDERS = _PATTERNENDERS - ASSERTCHARS = _ASSERTCHARS - LOOKBEHINDASSERTCHARS = _LOOKBEHINDASSERTCHARS - REPEATCODES = _REPEATCODES - - while 1: - - if source.next in PATTERNENDERS: - break # end of subpattern - this = sourceget() - if this is None: - break # end of pattern - - if state.flags & SRE_FLAG_VERBOSE: - # skip whitespace and comments - if this in WHITESPACE: - continue - if this == "#": - while 1: - this = sourceget() - if this in (None, "\n"): - break - continue - - if this and this[0] not in SPECIAL_CHARS: - subpatternappend((LITERAL, ord(this))) - - elif this == "[": - # character set - set = [] - setappend = set.append -## if sourcematch(":"): -## pass # handle character classes - if sourcematch("^"): - setappend((NEGATE, None)) - # check remaining characters - start = set[:] - while 1: - this = sourceget() - if this == "]" and set != start: - break - elif this and this[0] == "\\": - code1 = _class_escape(source, this) - elif this: - code1 = LITERAL, ord(this) - else: - raise error, "unexpected end of regular expression" - if sourcematch("-"): - # potential range - this = sourceget() - if this == "]": - if code1[0] is IN: - code1 = code1[1][0] - setappend(code1) - setappend((LITERAL, ord("-"))) - break - elif this: - if this[0] == "\\": - code2 = _class_escape(source, this) - else: - code2 = LITERAL, ord(this) - if code1[0] != LITERAL or code2[0] != LITERAL: - raise error, "bad character range" - lo = code1[1] - hi = code2[1] - if hi < lo: - raise error, "bad character range" - setappend((RANGE, (lo, hi))) - else: - raise error, "unexpected end of regular expression" - else: - if code1[0] is IN: - code1 = code1[1][0] - setappend(code1) - - # XXX: <fl> should move set optimization to compiler! - if _len(set)==1 and set[0][0] is LITERAL: - subpatternappend(set[0]) # optimization - elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL: - subpatternappend((NOT_LITERAL, set[1][1])) # optimization - else: - # XXX: <fl> should add charmap optimization here - subpatternappend((IN, set)) - - elif this and this[0] in REPEAT_CHARS: - # repeat previous item - if this == "?": - min, max = 0, 1 - elif this == "*": - min, max = 0, MAXREPEAT - - elif this == "+": - min, max = 1, MAXREPEAT - elif this == "{": - if source.next == "}": - subpatternappend((LITERAL, ord(this))) - continue - here = source.tell() - min, max = 0, MAXREPEAT - lo = hi = "" - while source.next in DIGITS: - lo = lo + source.get() - if sourcematch(","): - while source.next in DIGITS: - hi = hi + sourceget() - else: - hi = lo - if not sourcematch("}"): - subpatternappend((LITERAL, ord(this))) - source.seek(here) - continue - if lo: - min = int(lo) - if min >= MAXREPEAT: - raise OverflowError("the repetition number is too large") - if hi: - max = int(hi) - if max >= MAXREPEAT: - raise OverflowError("the repetition number is too large") - if max < min: - raise error("bad repeat interval") - else: - raise error, "not supported" - # figure out which item to repeat - if subpattern: - item = subpattern[-1:] - else: - item = None - if not item or (_len(item) == 1 and item[0][0] == AT): - raise error, "nothing to repeat" - if item[0][0] in REPEATCODES: - raise error, "multiple repeat" - if sourcematch("?"): - subpattern[-1] = (MIN_REPEAT, (min, max, item)) - else: - subpattern[-1] = (MAX_REPEAT, (min, max, item)) - - elif this == ".": - subpatternappend((ANY, None)) - - elif this == "(": - group = 1 - name = None - condgroup = None - if sourcematch("?"): - group = 0 - # options - if sourcematch("P"): - # python extensions - if sourcematch("<"): - # named group: skip forward to end of name - name = "" - while 1: - char = sourceget() - if char is None: - raise error, "unterminated name" - if char == ">": - break - name = name + char - group = 1 - if not name: - raise error("missing group name") - if not isname(name): - raise error("bad character in group name %r" % - name) - elif sourcematch("="): - # named backreference - name = "" - while 1: - char = sourceget() - if char is None: - raise error, "unterminated name" - if char == ")": - break - name = name + char - if not name: - raise error("missing group name") - if not isname(name): - raise error("bad character in backref group name " - "%r" % name) - gid = state.groupdict.get(name) - if gid is None: - msg = "unknown group name: {0!r}".format(name) - raise error(msg) - if state.lookbehind: - import warnings - warnings.warn('group references in lookbehind ' - 'assertions are not supported', - RuntimeWarning) - subpatternappend((GROUPREF, gid)) - continue - else: - char = sourceget() - if char is None: - raise error, "unexpected end of pattern" - raise error, "unknown specifier: ?P%s" % char - elif sourcematch(":"): - # non-capturing group - group = 2 - elif sourcematch("#"): - # comment - while 1: - if source.next is None or source.next == ")": - break - sourceget() - if not sourcematch(")"): - raise error, "unbalanced parenthesis" - continue - elif source.next in ASSERTCHARS: - # lookahead assertions - char = sourceget() - dir = 1 - if char == "<": - if source.next not in LOOKBEHINDASSERTCHARS: - raise error, "syntax error" - dir = -1 # lookbehind - char = sourceget() - state.lookbehind += 1 - p = _parse_sub(source, state) - if dir < 0: - state.lookbehind -= 1 - if not sourcematch(")"): - raise error, "unbalanced parenthesis" - if char == "=": - subpatternappend((ASSERT, (dir, p))) - else: - subpatternappend((ASSERT_NOT, (dir, p))) - continue - elif sourcematch("("): - # conditional backreference group - condname = "" - while 1: - char = sourceget() - if char is None: - raise error, "unterminated name" - if char == ")": - break - condname = condname + char - group = 2 - if not condname: - raise error("missing group name") - if isname(condname): - condgroup = state.groupdict.get(condname) - if condgroup is None: - msg = "unknown group name: {0!r}".format(condname) - raise error(msg) - else: - try: - condgroup = int(condname) - except ValueError: - raise error, "bad character in group name" - if state.lookbehind: - import warnings - warnings.warn('group references in lookbehind ' - 'assertions are not supported', - RuntimeWarning) - else: - # flags - if not source.next in FLAGS: - raise error, "unexpected end of pattern" - while source.next in FLAGS: - state.flags = state.flags | FLAGS[sourceget()] - if group: - # parse group contents - if group == 2: - # anonymous group - group = None - else: - group = state.opengroup(name) - if condgroup: - p = _parse_sub_cond(source, state, condgroup) - else: - p = _parse_sub(source, state) - if not sourcematch(")"): - raise error, "unbalanced parenthesis" - if group is not None: - state.closegroup(group) - subpatternappend((SUBPATTERN, (group, p))) - else: - while 1: - char = sourceget() - if char is None: - raise error, "unexpected end of pattern" - if char == ")": - break - raise error, "unknown extension" - - elif this == "^": - subpatternappend((AT, AT_BEGINNING)) - - elif this == "$": - subpattern.append((AT, AT_END)) - - elif this and this[0] == "\\": - code = _escape(source, this, state) - subpatternappend(code) - - else: - raise error, "parser error" - - return subpattern - -def parse(str, flags=0, pattern=None): - # parse 're' pattern into list of (opcode, argument) tuples - - source = Tokenizer(str) - - if pattern is None: - pattern = Pattern() - pattern.flags = flags - pattern.str = str - - p = _parse_sub(source, pattern, 0) - - tail = source.get() - if tail == ")": - raise error, "unbalanced parenthesis" - elif tail: - raise error, "bogus characters at end of regular expression" - - if not (flags & SRE_FLAG_VERBOSE) and p.pattern.flags & SRE_FLAG_VERBOSE: - # the VERBOSE flag was switched on inside the pattern. to be - # on the safe side, we'll parse the whole thing again... - return parse(str, p.pattern.flags) - - if flags & SRE_FLAG_DEBUG: - p.dump() - - return p - -def parse_template(source, pattern): - # parse 're' replacement string into list of literals and - # group references - s = Tokenizer(source) - sget = s.get - p = [] - a = p.append - def literal(literal, p=p, pappend=a): - if p and p[-1][0] is LITERAL: - p[-1] = LITERAL, p[-1][1] + literal - else: - pappend((LITERAL, literal)) - sep = source[:0] - if type(sep) is type(""): - makechar = chr - else: - makechar = unichr - while 1: - this = sget() - if this is None: - break # end of replacement string - if this and this[0] == "\\": - # group - c = this[1:2] - if c == "g": - name = "" - if s.match("<"): - while 1: - char = sget() - if char is None: - raise error, "unterminated group name" - if char == ">": - break - name = name + char - if not name: - raise error, "missing group name" - try: - index = int(name) - if index < 0: - raise error, "negative group number" - except ValueError: - if not isname(name): - raise error, "bad character in group name" - try: - index = pattern.groupindex[name] - except KeyError: - msg = "unknown group name: {0!r}".format(name) - raise IndexError(msg) - a((MARK, index)) - elif c == "0": - if s.next in OCTDIGITS: - this = this + sget() - if s.next in OCTDIGITS: - this = this + sget() - literal(makechar(int(this[1:], 8) & 0xff)) - elif c in DIGITS: - isoctal = False - if s.next in DIGITS: - this = this + sget() - if (c in OCTDIGITS and this[2] in OCTDIGITS and - s.next in OCTDIGITS): - this = this + sget() - isoctal = True - literal(makechar(int(this[1:], 8) & 0xff)) - if not isoctal: - a((MARK, int(this[1:]))) - else: - try: - this = makechar(ESCAPES[this][1]) - except KeyError: - pass - literal(this) - else: - literal(this) - # convert template to groups and literals lists - i = 0 - groups = [] - groupsappend = groups.append - literals = [None] * len(p) - for c, s in p: - if c is MARK: - groupsappend((i, s)) - # literal[i] is already None - else: - literals[i] = s - i = i + 1 - return groups, literals - -def expand_template(template, match): - g = match.group - sep = match.string[:0] - groups, literals = template - literals = literals[:] - try: - for index, group in groups: - literals[index] = s = g(group) - if s is None: - raise error, "unmatched group" - except IndexError: - raise error, "invalid group reference" - return sep.join(literals)
--- a/test/lib/python2.7/stat.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -"""Constants/functions for interpreting results of os.stat() and os.lstat(). - -Suggested usage: from stat import * -""" - -# Indices for stat struct members in the tuple returned by os.stat() - -ST_MODE = 0 -ST_INO = 1 -ST_DEV = 2 -ST_NLINK = 3 -ST_UID = 4 -ST_GID = 5 -ST_SIZE = 6 -ST_ATIME = 7 -ST_MTIME = 8 -ST_CTIME = 9 - -# Extract bits from the mode - -def S_IMODE(mode): - return mode & 07777 - -def S_IFMT(mode): - return mode & 0170000 - -# Constants used as S_IFMT() for various file types -# (not all are implemented on all systems) - -S_IFDIR = 0040000 -S_IFCHR = 0020000 -S_IFBLK = 0060000 -S_IFREG = 0100000 -S_IFIFO = 0010000 -S_IFLNK = 0120000 -S_IFSOCK = 0140000 - -# Functions to test for each file type - -def S_ISDIR(mode): - return S_IFMT(mode) == S_IFDIR - -def S_ISCHR(mode): - return S_IFMT(mode) == S_IFCHR - -def S_ISBLK(mode): - return S_IFMT(mode) == S_IFBLK - -def S_ISREG(mode): - return S_IFMT(mode) == S_IFREG - -def S_ISFIFO(mode): - return S_IFMT(mode) == S_IFIFO - -def S_ISLNK(mode): - return S_IFMT(mode) == S_IFLNK - -def S_ISSOCK(mode): - return S_IFMT(mode) == S_IFSOCK - -# Names for permission bits - -S_ISUID = 04000 -S_ISGID = 02000 -S_ENFMT = S_ISGID -S_ISVTX = 01000 -S_IREAD = 00400 -S_IWRITE = 00200 -S_IEXEC = 00100 -S_IRWXU = 00700 -S_IRUSR = 00400 -S_IWUSR = 00200 -S_IXUSR = 00100 -S_IRWXG = 00070 -S_IRGRP = 00040 -S_IWGRP = 00020 -S_IXGRP = 00010 -S_IRWXO = 00007 -S_IROTH = 00004 -S_IWOTH = 00002 -S_IXOTH = 00001 - -# Names for file flags - -UF_NODUMP = 0x00000001 -UF_IMMUTABLE = 0x00000002 -UF_APPEND = 0x00000004 -UF_OPAQUE = 0x00000008 -UF_NOUNLINK = 0x00000010 -UF_COMPRESSED = 0x00000020 # OS X: file is hfs-compressed -UF_HIDDEN = 0x00008000 # OS X: file should not be displayed -SF_ARCHIVED = 0x00010000 -SF_IMMUTABLE = 0x00020000 -SF_APPEND = 0x00040000 -SF_NOUNLINK = 0x00100000 -SF_SNAPSHOT = 0x00200000
--- a/test/lib/python2.7/types.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ -"""Define names for all type symbols known in the standard interpreter. - -Types that are part of optional modules (e.g. array) are not listed. -""" -import sys - -# Iterators in Python aren't a matter of type but of protocol. A large -# and changing number of builtin types implement *some* flavor of -# iterator. Don't check the type! Use hasattr to check for both -# "__iter__" and "next" attributes instead. - -NoneType = type(None) -TypeType = type -ObjectType = object - -IntType = int -LongType = long -FloatType = float -BooleanType = bool -try: - ComplexType = complex -except NameError: - pass - -StringType = str - -# StringTypes is already outdated. Instead of writing "type(x) in -# types.StringTypes", you should use "isinstance(x, basestring)". But -# we keep around for compatibility with Python 2.2. -try: - UnicodeType = unicode - StringTypes = (StringType, UnicodeType) -except NameError: - StringTypes = (StringType,) - -BufferType = buffer - -TupleType = tuple -ListType = list -DictType = DictionaryType = dict - -def _f(): pass -FunctionType = type(_f) -LambdaType = type(lambda: None) # Same as FunctionType -CodeType = type(_f.func_code) - -def _g(): - yield 1 -GeneratorType = type(_g()) - -class _C: - def _m(self): pass -ClassType = type(_C) -UnboundMethodType = type(_C._m) # Same as MethodType -_x = _C() -InstanceType = type(_x) -MethodType = type(_x._m) - -BuiltinFunctionType = type(len) -BuiltinMethodType = type([].append) # Same as BuiltinFunctionType - -ModuleType = type(sys) -FileType = file -XRangeType = xrange - -try: - raise TypeError -except TypeError: - tb = sys.exc_info()[2] - TracebackType = type(tb) - FrameType = type(tb.tb_frame) - del tb - -SliceType = slice -EllipsisType = type(Ellipsis) - -DictProxyType = type(TypeType.__dict__) -NotImplementedType = type(NotImplemented) - -# For Jython, the following two types are identical -GetSetDescriptorType = type(FunctionType.func_code) -MemberDescriptorType = type(FunctionType.func_globals) - -del sys, _f, _g, _C, _x # Not for export - -__all__ = list(n for n in globals() if n[:1] != '_')
--- a/test/lib/python2.7/warnings.py Thu May 18 19:06:49 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,422 +0,0 @@ -"""Python part of the warnings subsystem.""" - -# Note: function level imports should *not* be used -# in this module as it may cause import lock deadlock. -# See bug 683658. -import linecache -import sys -import types - -__all__ = ["warn", "warn_explicit", "showwarning", - "formatwarning", "filterwarnings", "simplefilter", - "resetwarnings", "catch_warnings"] - - -def warnpy3k(message, category=None, stacklevel=1): - """Issue a deprecation warning for Python 3.x related changes. - - Warnings are omitted unless Python is started with the -3 option. - """ - if sys.py3kwarning: - if category is None: - category = DeprecationWarning - warn(message, category, stacklevel+1) - -def _show_warning(message, category, filename, lineno, file=None, line=None): - """Hook to write a warning to a file; replace if you like.""" - if file is None: - file = sys.stderr - if file is None: - # sys.stderr is None - warnings get lost - return - try: - file.write(formatwarning(message, category, filename, lineno, line)) - except (IOError, UnicodeError): - pass # the file (probably stderr) is invalid - this warning gets lost. -# Keep a working version around in case the deprecation of the old API is -# triggered. -showwarning = _show_warning - -def formatwarning(message, category, filename, lineno, line=None): - """Function to format a warning the standard way.""" - try: - unicodetype = unicode - except NameError: - unicodetype = () - try: - message = str(message) - except UnicodeEncodeError: - pass - s = "%s: %s: %s\n" % (lineno, category.__name__, message) - line = linecache.getline(filename, lineno) if line is None else line - if line: - line = line.strip() - if isinstance(s, unicodetype) and isinstance(line, str): - line = unicode(line, 'latin1') - s += " %s\n" % line - if isinstance(s, unicodetype) and isinstance(filename, str): - enc = sys.getfilesystemencoding() - if enc: - try: - filename = unicode(filename, enc) - except UnicodeDecodeError: - pass - s = "%s:%s" % (filename, s) - return s - -def filterwarnings(action, message="", category=Warning, module="", lineno=0, - append=0): - """Insert an entry into the list of warnings filters (at the front). - - 'action' -- one of "error", "ignore", "always", "default", "module", - or "once" - 'message' -- a regex that the warning message must match - 'category' -- a class that the warning must be a subclass of - 'module' -- a regex that the module name must match - 'lineno' -- an integer line number, 0 matches all warnings - 'append' -- if true, append to the list of filters - """ - import re - assert action in ("error", "ignore", "always", "default", "module", - "once"), "invalid action: %r" % (action,) - assert isinstance(message, basestring), "message must be a string" - assert isinstance(category, (type, types.ClassType)), \ - "category must be a class" - assert issubclass(category, Warning), "category must be a Warning subclass" - assert isinstance(module, basestring), "module must be a string" - assert isinstance(lineno, int) and lineno >= 0, \ - "lineno must be an int >= 0" - item = (action, re.compile(message, re.I), category, - re.compile(module), lineno) - if append: - filters.append(item) - else: - filters.insert(0, item) - -def simplefilter(action, category=Warning, lineno=0, append=0): - """Insert a simple entry into the list of warnings filters (at the front). - - A simple filter matches all modules and messages. - 'action' -- one of "error", "ignore", "always", "default", "module", - or "once" - 'category' -- a class that the warning must be a subclass of - 'lineno' -- an integer line number, 0 matches all warnings - 'append' -- if true, append to the list of filters - """ - assert action in ("error", "ignore", "always", "default", "module", - "once"), "invalid action: %r" % (action,) - assert isinstance(lineno, int) and lineno >= 0, \ - "lineno must be an int >= 0" - item = (action, None, category, None, lineno) - if append: - filters.append(item) - else: - filters.insert(0, item) - -def resetwarnings(): - """Clear the list of warning filters, so that no filters are active.""" - filters[:] = [] - -class _OptionError(Exception): - """Exception used by option processing helpers.""" - pass - -# Helper to process -W options passed via sys.warnoptions -def _processoptions(args): - for arg in args: - try: - _setoption(arg) - except _OptionError, msg: - print >>sys.stderr, "Invalid -W option ignored:", msg - -# Helper for _processoptions() -def _setoption(arg): - import re - parts = arg.split(':') - if len(parts) > 5: - raise _OptionError("too many fields (max 5): %r" % (arg,)) - while len(parts) < 5: - parts.append('') - action, message, category, module, lineno = [s.strip() - for s in parts] - action = _getaction(action) - message = re.escape(message) - category = _getcategory(category) - module = re.escape(module) - if module: - module = module + '$' - if lineno: - try: - lineno = int(lineno) - if lineno < 0: - raise ValueError - except (ValueError, OverflowError): - raise _OptionError("invalid lineno %r" % (lineno,)) - else: - lineno = 0 - filterwarnings(action, message, category, module, lineno) - -# Helper for _setoption() -def _getaction(action): - if not action: - return "default" - if action == "all": return "always" # Alias - for a in ('default', 'always', 'ignore', 'module', 'once', 'error'): - if a.startswith(action): - return a - raise _OptionError("invalid action: %r" % (action,)) - -# Helper for _setoption() -def _getcategory(category): - import re - if not category: - return Warning - if re.match("^[a-zA-Z0-9_]+$", category): - try: - cat = eval(category) - except NameError: - raise _OptionError("unknown warning category: %r" % (category,)) - else: - i = category.rfind(".") - module = category[:i] - klass = category[i+1:] - try: - m = __import__(module, None, None, [klass]) - except ImportError: - raise _OptionError("invalid module name: %r" % (module,)) - try: - cat = getattr(m, klass) - except AttributeError: - raise _OptionError("unknown warning category: %r" % (category,)) - if not issubclass(cat, Warning): - raise _OptionError("invalid warning category: %r" % (category,)) - return cat - - -# Code typically replaced by _warnings -def warn(message, category=None, stacklevel=1): - """Issue a warning, or maybe ignore it or raise an exception.""" - # Check if message is already a Warning object - if isinstance(message, Warning): - category = message.__class__ - # Check category argument - if category is None: - category = UserWarning - assert issubclass(category, Warning) - # Get context information - try: - caller = sys._getframe(stacklevel) - except ValueError: - globals = sys.__dict__ - lineno = 1 - else: - globals = caller.f_globals - lineno = caller.f_lineno - if '__name__' in globals: - module = globals['__name__'] - else: - module = "<string>" - filename = globals.get('__file__') - if filename: - fnl = filename.lower() - if fnl.endswith((".pyc", ".pyo")): - filename = filename[:-1] - else: - if module == "__main__": - try: - filename = sys.argv[0] - except AttributeError: - # embedded interpreters don't have sys.argv, see bug #839151 - filename = '__main__' - if not filename: - filename = module - registry = globals.setdefault("__warningregistry__", {}) - warn_explicit(message, category, filename, lineno, module, registry, - globals) - -def warn_explicit(message, category, filename, lineno, - module=None, registry=None, module_globals=None): - lineno = int(lineno) - if module is None: - module = filename or "<unknown>" - if module[-3:].lower() == ".py": - module = module[:-3] # XXX What about leading pathname? - if registry is None: - registry = {} - if isinstance(message, Warning): - text = str(message) - category = message.__class__ - else: - text = message - message = category(message) - key = (text, category, lineno) - # Quick test for common case - if registry.get(key): - return - # Search the filters - for item in filters: - action, msg, cat, mod, ln = item - if ((msg is None or msg.match(text)) and - issubclass(category, cat) and - (mod is None or mod.match(module)) and - (ln == 0 or lineno == ln)): - break - else: - action = defaultaction - # Early exit actions - if action == "ignore": - registry[key] = 1 - return - - # Prime the linecache for formatting, in case the - # "file" is actually in a zipfile or something. - linecache.getlines(filename, module_globals) - - if action == "error": - raise message - # Other actions - if action == "once": - registry[key] = 1 - oncekey = (text, category) - if onceregistry.get(oncekey): - return - onceregistry[oncekey] = 1 - elif action == "always": - pass - elif action == "module": - registry[key] = 1 - altkey = (text, category, 0) - if registry.get(altkey): - return - registry[altkey] = 1 - elif action == "default": - registry[key] = 1 - else: - # Unrecognized actions are errors - raise RuntimeError( - "Unrecognized action (%r) in warnings.filters:\n %s" % - (action, item)) - # Print message and context - showwarning(message, category, filename, lineno) - - -class WarningMessage(object): - - """Holds the result of a single showwarning() call.""" - - _WARNING_DETAILS = ("message", "category", "filename", "lineno", "file", - "line") - - def __init__(self, message, category, filename, lineno, file=None, - line=None): - local_values = locals() - for attr in self._WARNING_DETAILS: - setattr(self, attr, local_values[attr]) - self._category_name = category.__name__ if category else None - - def __str__(self): - return ("{message : %r, category : %r, filename : %r, lineno : %s, " - "line : %r}" % (self.message, self._category_name, - self.filename, self.lineno, self.line)) - - -class catch_warnings(object): - - """A context manager that copies and restores the warnings filter upon - exiting the context. - - The 'record' argument specifies whether warnings should be captured by a - custom implementation of warnings.showwarning() and be appended to a list - returned by the context manager. Otherwise None is returned by the context - manager. The objects appended to the list are arguments whose attributes - mirror the arguments to showwarning(). - - The 'module' argument is to specify an alternative module to the module - named 'warnings' and imported under that name. This argument is only useful - when testing the warnings module itself. - - """ - - def __init__(self, record=False, module=None): - """Specify whether to record warnings and if an alternative module - should be used other than sys.modules['warnings']. - - For compatibility with Python 3.0, please consider all arguments to be - keyword-only. - - """ - self._record = record - self._module = sys.modules['warnings'] if module is None else module - self._entered = False - - def __repr__(self): - args = [] - if self._record: - args.append("record=True") - if self._module is not sys.modules['warnings']: - args.append("module=%r" % self._module) - name = type(self).__name__ - return "%s(%s)" % (name, ", ".join(args)) - - def __enter__(self): - if self._entered: - raise RuntimeError("Cannot enter %r twice" % self) - self._entered = True - self._filters = self._module.filters - self._module.filters = self._filters[:] - self._showwarning = self._module.showwarning - if self._record: - log = [] - def showwarning(*args, **kwargs): - log.append(WarningMessage(*args, **kwargs)) - self._module.showwarning = showwarning - return log - else: - return None - - def __exit__(self, *exc_info): - if not self._entered: - raise RuntimeError("Cannot exit %r without entering first" % self) - self._module.filters = self._filters - self._module.showwarning = self._showwarning - - -# filters contains a sequence of filter 5-tuples -# The components of the 5-tuple are: -# - an action: error, ignore, always, default, module, or once -# - a compiled regex that must match the warning message -# - a class representing the warning category -# - a compiled regex that must match the module that is being warned -# - a line number for the line being warning, or 0 to mean any line -# If either if the compiled regexs are None, match anything. -_warnings_defaults = False -try: - from _warnings import (filters, default_action, once_registry, - warn, warn_explicit) - defaultaction = default_action - onceregistry = once_registry - _warnings_defaults = True -except ImportError: - filters = [] - defaultaction = "default" - onceregistry = {} - - -# Module initialization -_processoptions(sys.warnoptions) -if not _warnings_defaults: - silence = [ImportWarning, PendingDeprecationWarning] - # Don't silence DeprecationWarning if -3 or -Q was used. - if not sys.py3kwarning and not sys.flags.division_warning: - silence.append(DeprecationWarning) - for cls in silence: - simplefilter("ignore", category=cls) - bytes_warning = sys.flags.bytes_warning - if bytes_warning > 1: - bytes_action = "error" - elif bytes_warning: - bytes_action = "default" - else: - bytes_action = "ignore" - simplefilter(bytes_action, category=BytesWarning, append=1) -del _warnings_defaults
